Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
H
hh_ccs
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
贺阳
hh_ccs
Commits
2e0670ae
提交
2e0670ae
authored
11月 06, 2025
作者:
贺阳
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
同步失败的提示
上级
9e6a63bc
隐藏空白字符变更
内嵌
并排
正在显示
1 个修改的文件
包含
229 行增加
和
203 行删除
+229
-203
batch_get_pod_info_wizard.py
ccs_base/wizard/batch_get_pod_info_wizard.py
+229
-203
没有找到文件。
ccs_base/wizard/batch_get_pod_info_wizard.py
浏览文件 @
2e0670ae
...
@@ -6,12 +6,14 @@ import io
...
@@ -6,12 +6,14 @@ import io
import
json
import
json
import
logging
import
logging
import
time
import
time
from
datetime
import
datetime
,
timedelta
import
requests
import
requests
from
odoo
import
models
,
fields
,
api
,
_
from
odoo
import
models
,
fields
,
api
,
_
from
odoo.exceptions
import
ValidationError
from
odoo.exceptions
import
ValidationError
from
.ai_image_edit_service
import
AIImageEditService
from
.ai_image_edit_service
import
AIImageEditService
from
datetime
import
datetime
,
timedelta
_logger
=
logging
.
getLogger
(
__name__
)
_logger
=
logging
.
getLogger
(
__name__
)
...
@@ -44,7 +46,8 @@ class BatchGetPodInfoWizard(models.TransientModel):
...
@@ -44,7 +46,8 @@ class BatchGetPodInfoWizard(models.TransientModel):
skip_ocr_direct_ai
=
fields
.
Boolean
(
skip_ocr_direct_ai
=
fields
.
Boolean
(
string
=
'Skip OCR Direct AI'
,
# 跳过OCR直接使用AI
string
=
'Skip OCR Direct AI'
,
# 跳过OCR直接使用AI
default
=
False
,
default
=
False
,
help
=
'Whether to skip OCR processing and directly use AI processing (for testing AI)'
# 是否跳过OCR处理,直接使用AI处理(用于测试AI)
help
=
'Whether to skip OCR processing and directly use AI processing (for testing AI)'
# 是否跳过OCR处理,直接使用AI处理(用于测试AI)
)
)
sync_match_node
=
fields
.
Boolean
(
sync_match_node
=
fields
.
Boolean
(
...
@@ -69,7 +72,7 @@ class BatchGetPodInfoWizard(models.TransientModel):
...
@@ -69,7 +72,7 @@ class BatchGetPodInfoWizard(models.TransientModel):
help
=
'Show error message'
help
=
'Show error message'
)
)
# PDF相关字段
# PDF相关字段
pdf_file
=
fields
.
Binary
(
string
=
'PDF文件'
,
help
=
'涂抹后的所有pdf文件合并为一个pdf文件'
)
pdf_file
=
fields
.
Binary
(
string
=
'PDF文件'
,
help
=
'涂抹后的所有pdf文件合并为一个pdf文件'
)
pdf_filename
=
fields
.
Char
(
string
=
'PDF文件名称'
)
pdf_filename
=
fields
.
Char
(
string
=
'PDF文件名称'
)
processed_files_data
=
fields
.
Text
(
string
=
'已处理的文件数据'
,
help
=
'存储已处理的文件信息(JSON格式)'
)
processed_files_data
=
fields
.
Text
(
string
=
'已处理的文件数据'
,
help
=
'存储已处理的文件信息(JSON格式)'
)
...
@@ -80,12 +83,12 @@ class BatchGetPodInfoWizard(models.TransientModel):
...
@@ -80,12 +83,12 @@ class BatchGetPodInfoWizard(models.TransientModel):
try
:
try
:
bl_objs
=
self
.
get_order
()
bl_objs
=
self
.
get_order
()
_logger
.
info
(
f
"开始预览操作,提单数量: {len(bl_objs)}"
)
_logger
.
info
(
f
"开始预览操作,提单数量: {len(bl_objs)}"
)
# 调用接口获取提单pdf文件
# 调用接口获取提单pdf文件
pdf_file_arr
=
self
.
_get_pdf_file_arr
()
pdf_file_arr
=
self
.
_get_pdf_file_arr
()
# 处理PDF文件,匹配提单对象
# 处理PDF文件,匹配提单对象
processed_files
=
self
.
_match_bl_by_file_name
(
pdf_file_arr
)
processed_files
=
self
.
_match_bl_by_file_name
(
pdf_file_arr
)
# 把没有匹配到文件的进行提示
# 把没有匹配到文件的进行提示
error_bl
=
[]
error_bl
=
[]
matched_bl_ids
=
[
f
[
'bl'
]
.
id
for
f
in
processed_files
if
f
.
get
(
'bl'
)]
matched_bl_ids
=
[
f
[
'bl'
]
.
id
for
f
in
processed_files
if
f
.
get
(
'bl'
)]
...
@@ -97,11 +100,11 @@ class BatchGetPodInfoWizard(models.TransientModel):
...
@@ -97,11 +100,11 @@ class BatchGetPodInfoWizard(models.TransientModel):
if
not
self
.
_context
.
get
(
'is_skip_raise_error'
):
if
not
self
.
_context
.
get
(
'is_skip_raise_error'
):
self
.
show_error_message
=
_
(
'
%
s bill of loading cannot find release note file'
)
%
(
self
.
show_error_message
=
_
(
'
%
s bill of loading cannot find release note file'
)
%
(
', '
.
join
([
bl
.
bl_no
for
bl
in
error_bl
]))
', '
.
join
([
bl
.
bl_no
for
bl
in
error_bl
]))
# 如果启用了涂抹文字,进行处理
# 如果启用了涂抹文字,进行处理
if
self
.
remove_specified_text
and
processed_files
:
if
self
.
remove_specified_text
and
processed_files
:
processed_files
=
self
.
_remove_specified_text
(
processed_files
,
debug_mode
=
False
)
processed_files
=
self
.
_remove_specified_text
(
processed_files
,
debug_mode
=
False
)
# 分离成功和失败的文件
# 分离成功和失败的文件
successful_files
=
[]
successful_files
=
[]
failed_files
=
[]
failed_files
=
[]
...
@@ -116,18 +119,18 @@ class BatchGetPodInfoWizard(models.TransientModel):
...
@@ -116,18 +119,18 @@ class BatchGetPodInfoWizard(models.TransientModel):
error_msg
=
str
(
self
.
show_error_message
)
error_msg
=
str
(
self
.
show_error_message
)
if
bl
and
bl
.
bl_no
in
error_msg
:
if
bl
and
bl
.
bl_no
in
error_msg
:
has_error
=
True
has_error
=
True
# 如果处理失败或者有错误,则认为失败
# 如果处理失败或者有错误,则认为失败
if
processing_failed
or
has_error
or
not
file_data
:
if
processing_failed
or
has_error
or
not
file_data
:
failed_files
.
append
(
file_info
)
failed_files
.
append
(
file_info
)
else
:
else
:
# 文件数据存在且处理成功
# 文件数据存在且处理成功
successful_files
.
append
(
file_info
)
successful_files
.
append
(
file_info
)
# 只合并成功的文件
# 只合并成功的文件
if
successful_files
:
if
successful_files
:
self
.
_merge_pdf_files
(
successful_files
)
self
.
_merge_pdf_files
(
successful_files
)
# 如果所有文件都成功了(没有失败的文件),自动勾选"是否同步成功涂抹的提单"
# 如果所有文件都成功了(没有失败的文件),自动勾选"是否同步成功涂抹的提单"
if
len
(
successful_files
)
==
len
(
processed_files
)
and
not
failed_files
:
if
len
(
successful_files
)
==
len
(
processed_files
)
and
not
failed_files
:
self
.
sync_successful_processed
=
True
self
.
sync_successful_processed
=
True
...
@@ -146,9 +149,10 @@ class BatchGetPodInfoWizard(models.TransientModel):
...
@@ -146,9 +149,10 @@ class BatchGetPodInfoWizard(models.TransientModel):
successful_bl_nos_str
=
'、'
.
join
(
successful_bl_nos
)
if
successful_bl_nos
else
''
successful_bl_nos_str
=
'、'
.
join
(
successful_bl_nos
)
if
successful_bl_nos
else
''
success_msg
=
f
"
\n
成功处理的提单: {successful_bl_nos_str}"
if
successful_bl_nos_str
else
''
success_msg
=
f
"
\n
成功处理的提单: {successful_bl_nos_str}"
if
successful_bl_nos_str
else
''
self
.
show_error_message
=
f
"{existing_error}{success_msg}"
self
.
show_error_message
=
f
"{existing_error}{success_msg}"
_logger
.
info
(
f
"部分提单处理失败(成功:{len(successful_files)},失败:{len(failed_files)}),成功处理的提单号已显示"
)
_logger
.
info
(
f
"部分提单处理失败(成功:{len(successful_files)},失败:{len(failed_files)}),成功处理的提单号已显示"
)
self
.
sync_successful_processed
=
False
self
.
sync_successful_processed
=
False
# 序列化并存储处理后的文件数据(包括成功和失败的,但只有成功的才会合并PDF)
# 序列化并存储处理后的文件数据(包括成功和失败的,但只有成功的才会合并PDF)
if
processed_files
:
if
processed_files
:
self
.
processed_files_data
=
self
.
_serialize_processed_files
(
processed_files
)
self
.
processed_files_data
=
self
.
_serialize_processed_files
(
processed_files
)
...
@@ -172,7 +176,7 @@ class BatchGetPodInfoWizard(models.TransientModel):
...
@@ -172,7 +176,7 @@ class BatchGetPodInfoWizard(models.TransientModel):
else
:
else
:
self
.
processed_files_data
=
''
self
.
processed_files_data
=
''
self
.
sync_successful_processed
=
False
self
.
sync_successful_processed
=
False
# 返回表单视图
# 返回表单视图
return
{
return
{
'type'
:
'ir.actions.act_window'
,
'type'
:
'ir.actions.act_window'
,
...
@@ -205,13 +209,13 @@ class BatchGetPodInfoWizard(models.TransientModel):
...
@@ -205,13 +209,13 @@ class BatchGetPodInfoWizard(models.TransientModel):
self
.
show_error_message
=
False
self
.
show_error_message
=
False
bl_objs
=
self
.
get_order
()
bl_objs
=
self
.
get_order
()
_logger
.
info
(
f
"
%
s提单开始执行批量获取POD信息操作"
%
len
(
bl_objs
))
_logger
.
info
(
f
"
%
s提单开始执行批量获取POD信息操作"
%
len
(
bl_objs
))
# 优先使用已处理的文件数据(预览时已处理)
# 优先使用已处理的文件数据(预览时已处理)
processed_files
=
None
processed_files
=
None
if
self
.
processed_files_data
:
if
self
.
processed_files_data
:
processed_files
=
self
.
_deserialize_processed_files
(
self
.
processed_files_data
)
processed_files
=
self
.
_deserialize_processed_files
(
self
.
processed_files_data
)
_logger
.
info
(
f
"使用已处理的文件数据,共 {len(processed_files)} 个文件"
)
_logger
.
info
(
f
"使用已处理的文件数据,共 {len(processed_files)} 个文件"
)
# 检查文件数据是否完整
# 检查文件数据是否完整
valid_files
=
[]
valid_files
=
[]
for
file_info
in
processed_files
:
for
file_info
in
processed_files
:
...
@@ -221,7 +225,7 @@ class BatchGetPodInfoWizard(models.TransientModel):
...
@@ -221,7 +225,7 @@ class BatchGetPodInfoWizard(models.TransientModel):
_logger
.
warning
(
f
"提单 {file_info.get('bl', {}).get('bl_no', 'Unknown')} 的文件数据为空"
)
_logger
.
warning
(
f
"提单 {file_info.get('bl', {}).get('bl_no', 'Unknown')} 的文件数据为空"
)
processed_files
=
valid_files
processed_files
=
valid_files
_logger
.
info
(
f
"有效文件数量: {len(processed_files)}"
)
_logger
.
info
(
f
"有效文件数量: {len(processed_files)}"
)
# 如果没有已处理的数据,则执行处理流程
# 如果没有已处理的数据,则执行处理流程
if
not
processed_files
:
if
not
processed_files
:
# 调用接口获取提单pdf文件
# 调用接口获取提单pdf文件
...
@@ -240,7 +244,7 @@ class BatchGetPodInfoWizard(models.TransientModel):
...
@@ -240,7 +244,7 @@ class BatchGetPodInfoWizard(models.TransientModel):
if
not
self
.
_context
.
get
(
'is_skip_raise_error'
):
if
not
self
.
_context
.
get
(
'is_skip_raise_error'
):
self
.
show_error_message
=
_
(
'
%
s bill of loading cannot find release note file'
)
%
(
self
.
show_error_message
=
_
(
'
%
s bill of loading cannot find release note file'
)
%
(
', '
.
join
([
bl
.
bl_no
for
bl
in
error_bl
]))
', '
.
join
([
bl
.
bl_no
for
bl
in
error_bl
]))
# 如果启用了涂抹文字,进行处理
# 如果启用了涂抹文字,进行处理
if
self
.
remove_specified_text
and
processed_files
:
if
self
.
remove_specified_text
and
processed_files
:
processed_files
=
self
.
_remove_specified_text
(
processed_files
,
debug_mode
=
False
)
processed_files
=
self
.
_remove_specified_text
(
processed_files
,
debug_mode
=
False
)
...
@@ -257,11 +261,12 @@ class BatchGetPodInfoWizard(models.TransientModel):
...
@@ -257,11 +261,12 @@ class BatchGetPodInfoWizard(models.TransientModel):
'view_mode'
:
'form'
,
'view_mode'
:
'form'
,
'res_id'
:
self
.
id
,
'res_id'
:
self
.
id
,
'target'
:
'new'
,
'target'
:
'new'
,
'context'
:
{
'active_id'
:
bl_objs
.
ids
,}
'context'
:
{
'active_id'
:
bl_objs
.
ids
,
}
}
}
# 检查是否有文字清除失败的错误
# 检查是否有文字清除失败的错误
if
self
.
show_error_message
and
any
(
'仍存在目标文字'
in
str
(
self
.
show_error_message
)
or
'未完全清除文字'
in
str
(
self
.
show_error_message
)):
if
self
.
show_error_message
and
any
(
'仍存在目标文字'
in
str
(
self
.
show_error_message
)
or
'未完全清除文字'
in
str
(
self
.
show_error_message
)):
_logger
.
error
(
f
"检测到文字清除失败,停止处理: {self.show_error_message}"
)
_logger
.
error
(
f
"检测到文字清除失败,停止处理: {self.show_error_message}"
)
return
{
return
{
'type'
:
'ir.actions.act_window'
,
'type'
:
'ir.actions.act_window'
,
...
@@ -272,7 +277,7 @@ class BatchGetPodInfoWizard(models.TransientModel):
...
@@ -272,7 +277,7 @@ class BatchGetPodInfoWizard(models.TransientModel):
'target'
:
'new'
,
'target'
:
'new'
,
'context'
:
{
'default_show_error_message'
:
self
.
show_error_message
,
'active_id'
:
bl_objs
.
ids
}
'context'
:
{
'default_show_error_message'
:
self
.
show_error_message
,
'active_id'
:
bl_objs
.
ids
}
}
}
# 只处理成功涂抹的提单
# 只处理成功涂抹的提单
# 直接根据processed_files中的processing_failed标志筛选成功处理的文件,无需从文本解析
# 直接根据processed_files中的processing_failed标志筛选成功处理的文件,无需从文本解析
successful_processed_files
=
[]
successful_processed_files
=
[]
...
@@ -291,19 +296,19 @@ class BatchGetPodInfoWizard(models.TransientModel):
...
@@ -291,19 +296,19 @@ class BatchGetPodInfoWizard(models.TransientModel):
_logger
.
info
(
f
"从{len(processed_files)}个文件中筛选出{len(successful_processed_files)}个成功处理的文件"
)
_logger
.
info
(
f
"从{len(processed_files)}个文件中筛选出{len(successful_processed_files)}个成功处理的文件"
)
else
:
else
:
_logger
.
warning
(
"没有找到已处理的文件数据"
)
_logger
.
warning
(
"没有找到已处理的文件数据"
)
# 回写到附件信息
# 回写到附件信息
if
successful_processed_files
and
(
self
.
sync_last_mile_pod
or
self
.
sync_match_node
):
if
successful_processed_files
and
(
self
.
sync_last_mile_pod
or
self
.
sync_match_node
):
# 回写PDF文件到清关文件
# 回写PDF文件到清关文件
self
.
_write_pdf_file
(
successful_processed_files
)
self
.
_write_pdf_file
(
successful_processed_files
)
# 再同步和回写
# 再同步和回写
if
self
.
sync_last_mile_pod
and
successful_processed_files
:
if
self
.
sync_last_mile_pod
and
successful_processed_files
:
self
.
_sync_last_mile_pod
(
successful_processed_files
)
self
.
_sync_last_mile_pod
(
successful_processed_files
)
# 同步推送匹配节点
# 同步推送匹配节点
if
self
.
sync_match_node
and
successful_processed_files
:
if
self
.
sync_match_node
and
successful_processed_files
:
#且需先对比小包当前节点的操作时间是否小于提取时间(同时区对比)若大于则不能推送,
#
且需先对比小包当前节点的操作时间是否小于提取时间(同时区对比)若大于则不能推送,
# 若需补推节点,则需判断提取时间-写入节点(不取写入第一个节点)的前序间隔时间是否大于小包当前节点的操作时间。
# 若需补推节点,则需判断提取时间-写入节点(不取写入第一个节点)的前序间隔时间是否大于小包当前节点的操作时间。
# 若不满足以上条件,则不执行生成和自动推送节点,并在小包上新增推送备注(新增该字段)回写备注信息:获取尾程POD,自动推送节点失败,有风险产生倒挂。请手动操作205-10-20 10:20:20(获取时间)
# 若不满足以上条件,则不执行生成和自动推送节点,并在小包上新增推送备注(新增该字段)回写备注信息:获取尾程POD,自动推送节点失败,有风险产生倒挂。请手动操作205-10-20 10:20:20(获取时间)
valid_files
=
self
.
_validate_node_push_conditions
(
successful_processed_files
)
valid_files
=
self
.
_validate_node_push_conditions
(
successful_processed_files
)
...
@@ -311,11 +316,10 @@ class BatchGetPodInfoWizard(models.TransientModel):
...
@@ -311,11 +316,10 @@ class BatchGetPodInfoWizard(models.TransientModel):
self
.
get_date_sync_match_node
(
valid_files
)
self
.
get_date_sync_match_node
(
valid_files
)
else
:
else
:
_logger
.
info
(
f
"没有满足条件的文件,不执行生成和自动推送节点"
)
_logger
.
info
(
f
"没有满足条件的文件,不执行生成和自动推送节点"
)
# 清理所有临时文件(包括数据库记录和物理文件),不能删,不然回写的时候没有文件了
# 清理所有临时文件(包括数据库记录和物理文件),不能删,不然回写的时候没有文件了
self
.
_cleanup_temp_attachments
(
bl_objs
)
self
.
_cleanup_temp_attachments
(
bl_objs
)
end_time
=
time
.
time
()
end_time
=
time
.
time
()
_logger
.
info
(
f
"批量获取POD信息操作完成,耗时: {end_time - start_time}秒"
)
_logger
.
info
(
f
"批量获取POD信息操作完成,耗时: {end_time - start_time}秒"
)
if
self
.
show_error_message
and
not
self
.
_context
.
get
(
'is_skip_raise_error'
):
if
self
.
show_error_message
and
not
self
.
_context
.
get
(
'is_skip_raise_error'
):
...
@@ -326,10 +330,9 @@ class BatchGetPodInfoWizard(models.TransientModel):
...
@@ -326,10 +330,9 @@ class BatchGetPodInfoWizard(models.TransientModel):
'view_mode'
:
'form'
,
'view_mode'
:
'form'
,
'res_id'
:
self
.
id
,
'res_id'
:
self
.
id
,
'target'
:
'new'
,
'target'
:
'new'
,
'context'
:
{
'default_show_error_message'
:
self
.
show_error_message
,
'active_id'
:
bl_objs
.
ids
}
'context'
:
{
'default_show_error_message'
:
self
.
show_error_message
,
'active_id'
:
bl_objs
.
ids
}
}
}
def
_validate_node_push_conditions
(
self
,
processed_files
):
def
_validate_node_push_conditions
(
self
,
processed_files
):
"""
"""
验证节点推送条件
验证节点推送条件
...
@@ -428,9 +431,8 @@ class BatchGetPodInfoWizard(models.TransientModel):
...
@@ -428,9 +431,8 @@ class BatchGetPodInfoWizard(models.TransientModel):
file_data
=
file_info
.
get
(
'file_data'
,
''
)
file_data
=
file_info
.
get
(
'file_data'
,
''
)
if
not
file_data
:
if
not
file_data
:
continue
continue
# 如果有文件为空的就回写,否则就创建新的清关文件记录
# 如果有文件为空的就回写,否则就创建新的清关文件记录
fix_name
=
'尾程交接POD(待大包数量和箱号)'
fix_name
=
'尾程交接POD(待大包数量和箱号)'
clearance_file
=
self
.
env
[
'cc.clearance.file'
]
.
search
(
clearance_file
=
self
.
env
[
'cc.clearance.file'
]
.
search
(
...
@@ -463,7 +465,7 @@ class BatchGetPodInfoWizard(models.TransientModel):
...
@@ -463,7 +465,7 @@ class BatchGetPodInfoWizard(models.TransientModel):
import
tempfile
import
tempfile
import
os
import
os
import
gc
import
gc
temp_file_path
=
None
temp_file_path
=
None
try
:
try
:
# 过滤有效的PDF文件
# 过滤有效的PDF文件
...
@@ -471,64 +473,64 @@ class BatchGetPodInfoWizard(models.TransientModel):
...
@@ -471,64 +473,64 @@ class BatchGetPodInfoWizard(models.TransientModel):
for
file_info
in
processed_files
:
for
file_info
in
processed_files
:
if
file_info
.
get
(
'bl'
)
and
file_info
.
get
(
'file_data'
):
if
file_info
.
get
(
'bl'
)
and
file_info
.
get
(
'file_data'
):
valid_files
.
append
(
file_info
)
valid_files
.
append
(
file_info
)
if
not
valid_files
:
if
not
valid_files
:
_logger
.
warning
(
"没有有效的PDF文件可以合并"
)
_logger
.
warning
(
"没有有效的PDF文件可以合并"
)
return
return
# 如果只有一个PDF文件,直接使用,不需要合并
# 如果只有一个PDF文件,直接使用,不需要合并
if
len
(
valid_files
)
==
1
:
if
len
(
valid_files
)
==
1
:
file_info
=
valid_files
[
0
]
file_info
=
valid_files
[
0
]
bl
=
file_info
[
'bl'
]
bl
=
file_info
[
'bl'
]
file_data
=
file_info
[
'file_data'
]
file_data
=
file_info
[
'file_data'
]
file_name
=
file_info
.
get
(
'file_name'
,
f
"{bl.bl_no}.pdf"
)
file_name
=
file_info
.
get
(
'file_name'
,
f
"{bl.bl_no}.pdf"
)
# 生成文件名(包含提单号和日期)
# 生成文件名(包含提单号和日期)
timestamp
=
datetime
.
now
()
.
strftime
(
'
%
Y
%
m
%
d_
%
H
%
M
%
S'
)
timestamp
=
datetime
.
now
()
.
strftime
(
'
%
Y
%
m
%
d_
%
H
%
M
%
S'
)
pdf_filename
=
f
"POD文件_{bl.bl_no}_{timestamp}.pdf"
pdf_filename
=
f
"POD文件_{bl.bl_no}_{timestamp}.pdf"
# 直接保存到字段
# 直接保存到字段
self
.
write
({
self
.
write
({
'pdf_file'
:
file_data
,
'pdf_file'
:
file_data
,
'pdf_filename'
:
pdf_filename
'pdf_filename'
:
pdf_filename
})
})
_logger
.
info
(
f
"单个PDF文件直接保存: {pdf_filename}"
)
_logger
.
info
(
f
"单个PDF文件直接保存: {pdf_filename}"
)
return
return
# 多个PDF文件需要合并
# 多个PDF文件需要合并
_logger
.
info
(
f
"开始合并 {len(valid_files)} 个PDF文件"
)
_logger
.
info
(
f
"开始合并 {len(valid_files)} 个PDF文件"
)
# 使用临时文件方式合并,避免内存占用过大
# 使用临时文件方式合并,避免内存占用过大
temp_file_path
=
tempfile
.
mktemp
(
suffix
=
'.pdf'
)
temp_file_path
=
tempfile
.
mktemp
(
suffix
=
'.pdf'
)
merged_pdf
=
fitz
.
open
()
merged_pdf
=
fitz
.
open
()
bl_numbers
=
[]
bl_numbers
=
[]
# 遍历所有处理后的PDF文件,分批处理以减少内存占用
# 遍历所有处理后的PDF文件,分批处理以减少内存占用
batch_size
=
5
# 每批处理5个PDF
batch_size
=
5
# 每批处理5个PDF
for
batch_start
in
range
(
0
,
len
(
valid_files
),
batch_size
):
for
batch_start
in
range
(
0
,
len
(
valid_files
),
batch_size
):
batch_files
=
valid_files
[
batch_start
:
batch_start
+
batch_size
]
batch_files
=
valid_files
[
batch_start
:
batch_start
+
batch_size
]
_logger
.
info
(
f
"处理第 {batch_start // batch_size + 1} 批,共 {len(batch_files)} 个PDF"
)
_logger
.
info
(
f
"处理第 {batch_start // batch_size + 1} 批,共 {len(batch_files)} 个PDF"
)
for
file_info
in
batch_files
:
for
file_info
in
batch_files
:
bl
=
file_info
[
'bl'
]
bl
=
file_info
[
'bl'
]
file_data
=
file_info
[
'file_data'
]
file_data
=
file_info
[
'file_data'
]
bl_numbers
.
append
(
bl
.
bl_no
)
bl_numbers
.
append
(
bl
.
bl_no
)
source_pdf
=
None
source_pdf
=
None
pdf_binary
=
None
pdf_binary
=
None
try
:
try
:
# 将base64数据转换为二进制
# 将base64数据转换为二进制
pdf_binary
=
base64
.
b64decode
(
file_data
)
pdf_binary
=
base64
.
b64decode
(
file_data
)
# 打开PDF文档
# 打开PDF文档
source_pdf
=
fitz
.
open
(
stream
=
pdf_binary
,
filetype
=
"pdf"
)
source_pdf
=
fitz
.
open
(
stream
=
pdf_binary
,
filetype
=
"pdf"
)
# 将源PDF的所有页面插入到合并的PDF中
# 将源PDF的所有页面插入到合并的PDF中
merged_pdf
.
insert_pdf
(
source_pdf
)
merged_pdf
.
insert_pdf
(
source_pdf
)
_logger
.
info
(
f
"已添加提单 {bl.bl_no} 的PDF到合并文档({len(source_pdf)} 页)"
)
_logger
.
info
(
f
"已添加提单 {bl.bl_no} 的PDF到合并文档({len(source_pdf)} 页)"
)
except
Exception
as
e
:
except
Exception
as
e
:
_logger
.
error
(
f
"合并提单 {bl.bl_no} 的PDF失败: {str(e)}"
)
_logger
.
error
(
f
"合并提单 {bl.bl_no} 的PDF失败: {str(e)}"
)
continue
continue
...
@@ -539,7 +541,7 @@ class BatchGetPodInfoWizard(models.TransientModel):
...
@@ -539,7 +541,7 @@ class BatchGetPodInfoWizard(models.TransientModel):
source_pdf
=
None
source_pdf
=
None
pdf_binary
=
None
pdf_binary
=
None
gc
.
collect
()
# 强制垃圾回收
gc
.
collect
()
# 强制垃圾回收
# 每批处理完后,保存到临时文件并释放内存
# 每批处理完后,保存到临时文件并释放内存
if
batch_start
+
batch_size
<
len
(
valid_files
):
if
batch_start
+
batch_size
<
len
(
valid_files
):
# 保存当前合并结果到临时文件
# 保存当前合并结果到临时文件
...
@@ -548,7 +550,7 @@ class BatchGetPodInfoWizard(models.TransientModel):
...
@@ -548,7 +550,7 @@ class BatchGetPodInfoWizard(models.TransientModel):
# 重新打开临时文件继续合并
# 重新打开临时文件继续合并
merged_pdf
=
fitz
.
open
(
temp_file_path
)
merged_pdf
=
fitz
.
open
(
temp_file_path
)
gc
.
collect
()
gc
.
collect
()
# 如果有页面,保存合并后的PDF
# 如果有页面,保存合并后的PDF
if
len
(
merged_pdf
)
>
0
:
if
len
(
merged_pdf
)
>
0
:
# 使用临时文件保存,减少内存占用
# 使用临时文件保存,减少内存占用
...
@@ -556,39 +558,39 @@ class BatchGetPodInfoWizard(models.TransientModel):
...
@@ -556,39 +558,39 @@ class BatchGetPodInfoWizard(models.TransientModel):
temp_file_path
=
tempfile
.
mktemp
(
suffix
=
'.pdf'
)
temp_file_path
=
tempfile
.
mktemp
(
suffix
=
'.pdf'
)
merged_pdf
.
save
(
temp_file_path
,
garbage
=
4
,
deflate
=
True
,
clean
=
True
)
merged_pdf
.
save
(
temp_file_path
,
garbage
=
4
,
deflate
=
True
,
clean
=
True
)
merged_pdf
.
close
()
merged_pdf
.
close
()
# 从临时文件读取并转换为base64
# 从临时文件读取并转换为base64
with
open
(
temp_file_path
,
'rb'
)
as
f
:
with
open
(
temp_file_path
,
'rb'
)
as
f
:
pdf_data
=
f
.
read
()
pdf_data
=
f
.
read
()
# 转换为base64
# 转换为base64
merged_pdf_base64
=
base64
.
b64encode
(
pdf_data
)
.
decode
(
'utf-8'
)
merged_pdf_base64
=
base64
.
b64encode
(
pdf_data
)
.
decode
(
'utf-8'
)
# 清理临时数据
# 清理临时数据
del
pdf_data
del
pdf_data
gc
.
collect
()
gc
.
collect
()
# 生成文件名(包含提单号和日期)
# 生成文件名(包含提单号和日期)
bl_numbers_str
=
'_'
.
join
(
bl_numbers
[:
5
])
# 最多显示5个提单号
bl_numbers_str
=
'_'
.
join
(
bl_numbers
[:
5
])
# 最多显示5个提单号
if
len
(
bl_numbers
)
>
5
:
if
len
(
bl_numbers
)
>
5
:
bl_numbers_str
+=
f
'_等{len(bl_numbers)}个'
bl_numbers_str
+=
f
'_等{len(bl_numbers)}个'
timestamp
=
datetime
.
now
()
.
strftime
(
'
%
Y
%
m
%
d_
%
H
%
M
%
S'
)
timestamp
=
datetime
.
now
()
.
strftime
(
'
%
Y
%
m
%
d_
%
H
%
M
%
S'
)
pdf_filename
=
f
"合并POD文件_{bl_numbers_str}_{timestamp}.pdf"
pdf_filename
=
f
"合并POD文件_{bl_numbers_str}_{timestamp}.pdf"
# 保存到字段
# 保存到字段
self
.
write
({
self
.
write
({
'pdf_file'
:
merged_pdf_base64
,
'pdf_file'
:
merged_pdf_base64
,
'pdf_filename'
:
pdf_filename
'pdf_filename'
:
pdf_filename
})
})
# 清理base64数据
# 清理base64数据
del
merged_pdf_base64
del
merged_pdf_base64
gc
.
collect
()
gc
.
collect
()
_logger
.
info
(
f
"成功合并 {len(bl_numbers)} 个PDF文件,文件名: {pdf_filename}"
)
_logger
.
info
(
f
"成功合并 {len(bl_numbers)} 个PDF文件,文件名: {pdf_filename}"
)
else
:
else
:
_logger
.
warning
(
"没有有效的PDF文件可以合并"
)
_logger
.
warning
(
"没有有效的PDF文件可以合并"
)
except
Exception
as
e
:
except
Exception
as
e
:
_logger
.
error
(
f
"合并PDF文件失败: {str(e)}"
)
_logger
.
error
(
f
"合并PDF文件失败: {str(e)}"
)
finally
:
finally
:
...
@@ -634,16 +636,23 @@ class BatchGetPodInfoWizard(models.TransientModel):
...
@@ -634,16 +636,23 @@ class BatchGetPodInfoWizard(models.TransientModel):
"""
"""
# return False#测试 先不同步
# return False#测试 先不同步
# 同步尾程POD信息
# 同步尾程POD信息
is_fail
=
[]
# 同步失败
for
file_info
in
processed_files
:
for
file_info
in
processed_files
:
if
not
file_info
[
'bl'
]:
if
not
file_info
[
'bl'
]:
continue
continue
bl
=
file_info
[
'bl'
]
bl
=
file_info
[
'bl'
]
# 查找清关文件并执行同步
# 查找清关文件并执行同步
clearance_file
=
file_info
.
get
(
'clearance_file'
)
clearance_file
=
file_info
.
get
(
'clearance_file'
)
if
clearance_file
:
if
clearance_file
:
clearance_file
.
action_sync
()
# 同步尾程POD
try
:
clearance_file
.
action_sync
()
# 同步尾程POD
except
Exception
as
e
:
logging
.
info
(
'_sync_last_mile_pod:
%
s'
%
e
)
is_fail
=
True
break
_logger
.
info
(
f
"Successfully synced POD for BL {bl.bl_no}"
)
_logger
.
info
(
f
"Successfully synced POD for BL {bl.bl_no}"
)
if
is_fail
:
raise
ValidationError
(
'本次同步失败,请重试!'
)
def
_check_target_texts_exist
(
self
,
pdf_binary
,
bl_no
):
def
_check_target_texts_exist
(
self
,
pdf_binary
,
bl_no
):
"""
"""
...
@@ -657,40 +666,41 @@ class BatchGetPodInfoWizard(models.TransientModel):
...
@@ -657,40 +666,41 @@ class BatchGetPodInfoWizard(models.TransientModel):
import
numpy
as
np
import
numpy
as
np
from
PIL
import
Image
from
PIL
import
Image
import
re
import
re
# 定义目标文字(与_find_target_texts一致)
# 定义目标文字(与_find_target_texts一致)
TARGET_TEXTS
=
[
'AGN'
,
'ACN'
,
'UCLINK LOGISITICS LTD'
,
'UCLINK LOGISITICS'
,
'UCLINK'
,
'LOGISITICS'
,
'LOGISTICS'
,
'LTD'
,
TARGET_TEXTS
=
[
'AGN'
,
'ACN'
,
'UCLINK LOGISITICS LTD'
,
'UCLINK LOGISITICS'
,
'UCLINK'
,
'LOGISITICS'
,
'LOGISTICS'
,
'LTD'
,
'UCLINKLOGISITICSLTD'
]
'UCLINKLOGISITICSLTD'
]
EXCLUDE_TEXTS
=
[
'AIR EQK'
,
'ARN'
,
'EQK'
,
'AIR'
,
'Page 1 of 1'
,
'Page 2 of 2'
,
'Page 3 of 3'
,
'Page 4 of 4'
,
EXCLUDE_TEXTS
=
[
'AIR EQK'
,
'ARN'
,
'EQK'
,
'AIR'
,
'Page 1 of 1'
,
'Page 2 of 2'
,
'Page 3 of 3'
,
'Page 4 of 4'
,
'Page 5 of 5'
]
'Page 5 of 5'
]
try
:
try
:
# 设置Tesseract路径
# 设置Tesseract路径
self
.
_setup_tesseract_path
()
self
.
_setup_tesseract_path
()
# 打开PDF文档
# 打开PDF文档
pdf_document
=
fitz
.
open
(
stream
=
pdf_binary
,
filetype
=
"pdf"
)
pdf_document
=
fitz
.
open
(
stream
=
pdf_binary
,
filetype
=
"pdf"
)
found_texts
=
[]
found_texts
=
[]
# 尝试导入OpenCV,如果失败则使用PIL替代
# 尝试导入OpenCV,如果失败则使用PIL替代
try
:
try
:
import
cv2
import
cv2
cv2_available
=
True
cv2_available
=
True
except
ImportError
:
except
ImportError
:
cv2_available
=
False
cv2_available
=
False
# 遍历每一页
# 遍历每一页
for
page_num
in
range
(
len
(
pdf_document
)):
for
page_num
in
range
(
len
(
pdf_document
)):
page
=
pdf_document
[
page_num
]
page
=
pdf_document
[
page_num
]
# 首先尝试从PDF文本层提取(如果是文本型PDF)
# 首先尝试从PDF文本层提取(如果是文本型PDF)
page_text_pdf
=
page
.
get_text
()
.
upper
()
page_text_pdf
=
page
.
get_text
()
.
upper
()
# 将页面转换为图像进行OCR识别
# 将页面转换为图像进行OCR识别
mat
=
fitz
.
Matrix
(
3.0
,
3.0
)
# 进一步提高分辨率,从2.0提升到3.0
mat
=
fitz
.
Matrix
(
3.0
,
3.0
)
# 进一步提高分辨率,从2.0提升到3.0
pix
=
page
.
get_pixmap
(
matrix
=
mat
)
pix
=
page
.
get_pixmap
(
matrix
=
mat
)
img_data
=
pix
.
tobytes
(
"png"
)
img_data
=
pix
.
tobytes
(
"png"
)
# 转换为PIL图像
# 转换为PIL图像
if
cv2_available
:
if
cv2_available
:
nparr
=
np
.
frombuffer
(
img_data
,
np
.
uint8
)
nparr
=
np
.
frombuffer
(
img_data
,
np
.
uint8
)
...
@@ -700,7 +710,7 @@ class BatchGetPodInfoWizard(models.TransientModel):
...
@@ -700,7 +710,7 @@ class BatchGetPodInfoWizard(models.TransientModel):
pil_img
=
Image
.
open
(
io
.
BytesIO
(
img_data
))
pil_img
=
Image
.
open
(
io
.
BytesIO
(
img_data
))
if
pil_img
.
mode
!=
'RGB'
:
if
pil_img
.
mode
!=
'RGB'
:
pil_img
=
pil_img
.
convert
(
'RGB'
)
pil_img
=
pil_img
.
convert
(
'RGB'
)
# OCR识别
# OCR识别
try
:
try
:
config
=
'--psm 6 --oem 1 -c tessedit_char_whitelist=ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789.,- -c preserve_interword_spaces=1'
config
=
'--psm 6 --oem 1 -c tessedit_char_whitelist=ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789.,- -c preserve_interword_spaces=1'
...
@@ -708,14 +718,14 @@ class BatchGetPodInfoWizard(models.TransientModel):
...
@@ -708,14 +718,14 @@ class BatchGetPodInfoWizard(models.TransientModel):
except
Exception
as
e
:
except
Exception
as
e
:
_logger
.
warning
(
f
"OCR识别失败,第{page_num + 1}页,使用PDF文本: {str(e)}"
)
_logger
.
warning
(
f
"OCR识别失败,第{page_num + 1}页,使用PDF文本: {str(e)}"
)
ocr_text
=
page_text_pdf
ocr_text
=
page_text_pdf
# 合并PDF文本和OCR文本进行检查
# 合并PDF文本和OCR文本进行检查
combined_text
=
(
page_text_pdf
+
' '
+
ocr_text
)
.
upper
()
combined_text
=
(
page_text_pdf
+
' '
+
ocr_text
)
.
upper
()
# 检查目标文字
# 检查目标文字
for
target_text
in
TARGET_TEXTS
:
for
target_text
in
TARGET_TEXTS
:
target_upper
=
target_text
.
upper
()
target_upper
=
target_text
.
upper
()
# 检查是否包含目标文字
# 检查是否包含目标文字
is_match
=
False
is_match
=
False
if
target_text
==
'AGN'
:
if
target_text
==
'AGN'
:
...
@@ -736,7 +746,7 @@ class BatchGetPodInfoWizard(models.TransientModel):
...
@@ -736,7 +746,7 @@ class BatchGetPodInfoWizard(models.TransientModel):
# 排除AIR、EQK、ARN等(需要这些词都不存在)
# 排除AIR、EQK、ARN等(需要这些词都不存在)
if
'AIR EQK'
not
in
combined_text
and
'ARN'
not
in
combined_text
:
if
'AIR EQK'
not
in
combined_text
and
'ARN'
not
in
combined_text
:
is_match
=
True
is_match
=
True
# 如果匹配,检查是否在排除列表中
# 如果匹配,检查是否在排除列表中
if
is_match
:
if
is_match
:
is_excluded
=
False
is_excluded
=
False
...
@@ -744,27 +754,28 @@ class BatchGetPodInfoWizard(models.TransientModel):
...
@@ -744,27 +754,28 @@ class BatchGetPodInfoWizard(models.TransientModel):
exclude_upper
=
exclude_text
.
upper
()
exclude_upper
=
exclude_text
.
upper
()
if
exclude_upper
in
combined_text
and
target_upper
in
combined_text
:
if
exclude_upper
in
combined_text
and
target_upper
in
combined_text
:
# 检查是否是页码
# 检查是否是页码
if
re
.
search
(
r'PAGE\s+\d+\s+OF\s+\d+'
,
combined_text
)
or
re
.
search
(
r'\d+\s*/\s*\d+'
,
combined_text
):
if
re
.
search
(
r'PAGE\s+\d+\s+OF\s+\d+'
,
combined_text
)
or
re
.
search
(
r'\d+\s*/\s*\d+'
,
combined_text
):
is_excluded
=
True
is_excluded
=
True
break
break
# 检查是否是AIR EQK等排除项
# 检查是否是AIR EQK等排除项
if
'AIR EQK'
in
combined_text
or
'ARN'
in
combined_text
:
if
'AIR EQK'
in
combined_text
or
'ARN'
in
combined_text
:
is_excluded
=
True
is_excluded
=
True
break
break
if
not
is_excluded
:
if
not
is_excluded
:
found_texts
.
append
(
f
"第{page_num + 1}页: {target_text}"
)
found_texts
.
append
(
f
"第{page_num + 1}页: {target_text}"
)
break
# 找到就跳出,避免重复
break
# 找到就跳出,避免重复
pdf_document
.
close
()
pdf_document
.
close
()
if
found_texts
:
if
found_texts
:
_logger
.
warning
(
f
"提单 {bl_no} 仍存在目标文字: {', '.join(found_texts)}"
)
_logger
.
warning
(
f
"提单 {bl_no} 仍存在目标文字: {', '.join(found_texts)}"
)
return
True
,
found_texts
return
True
,
found_texts
else
:
else
:
_logger
.
info
(
f
"提单 {bl_no} 未发现目标文字"
)
_logger
.
info
(
f
"提单 {bl_no} 未发现目标文字"
)
return
False
,
[]
return
False
,
[]
except
Exception
as
e
:
except
Exception
as
e
:
_logger
.
error
(
f
"检查目标文字失败,提单号: {bl_no}, 错误: {str(e)}"
)
_logger
.
error
(
f
"检查目标文字失败,提单号: {bl_no}, 错误: {str(e)}"
)
# 检查失败时,假设不存在(避免误报)
# 检查失败时,假设不存在(避免误报)
...
@@ -810,11 +821,12 @@ class BatchGetPodInfoWizard(models.TransientModel):
...
@@ -810,11 +821,12 @@ class BatchGetPodInfoWizard(models.TransientModel):
)
)
if
ai_processed_pdf
:
if
ai_processed_pdf
:
processed_file_data
=
base64
.
b64encode
(
ai_processed_pdf
)
.
decode
(
'utf-8'
)
processed_file_data
=
base64
.
b64encode
(
ai_processed_pdf
)
.
decode
(
'utf-8'
)
# 检查是否还存在目标文字
# 检查是否还存在目标文字
final_check_pdf
=
base64
.
b64decode
(
processed_file_data
)
final_check_pdf
=
base64
.
b64decode
(
processed_file_data
)
text_still_exists
,
final_found_texts
=
self
.
_check_target_texts_exist
(
final_check_pdf
,
bl
.
bl_no
)
text_still_exists
,
final_found_texts
=
self
.
_check_target_texts_exist
(
final_check_pdf
,
bl
.
bl_no
)
if
text_still_exists
:
if
text_still_exists
:
error_msg
=
f
"提单 {bl.bl_no} 经过AI处理后仍存在目标文字: {', '.join(final_found_texts)},请取消该提单操作,手动处理"
error_msg
=
f
"提单 {bl.bl_no} 经过AI处理后仍存在目标文字: {', '.join(final_found_texts)},请取消该提单操作,手动处理"
_logger
.
error
(
error_msg
)
_logger
.
error
(
error_msg
)
...
@@ -860,11 +872,12 @@ class BatchGetPodInfoWizard(models.TransientModel):
...
@@ -860,11 +872,12 @@ class BatchGetPodInfoWizard(models.TransientModel):
)
)
if
ai_processed_pdf
:
if
ai_processed_pdf
:
processed_file_data
=
base64
.
b64encode
(
ai_processed_pdf
)
.
decode
(
'utf-8'
)
processed_file_data
=
base64
.
b64encode
(
ai_processed_pdf
)
.
decode
(
'utf-8'
)
# 第四步:再次检查是否还存在目标文字
# 第四步:再次检查是否还存在目标文字
final_check_pdf
=
base64
.
b64decode
(
processed_file_data
)
final_check_pdf
=
base64
.
b64decode
(
processed_file_data
)
text_still_exists
,
final_found_texts
=
self
.
_check_target_texts_exist
(
final_check_pdf
,
bl
.
bl_no
)
text_still_exists
,
final_found_texts
=
self
.
_check_target_texts_exist
(
final_check_pdf
,
bl
.
bl_no
)
if
text_still_exists
:
if
text_still_exists
:
# 第五步:如果仍然存在,记录错误信息并停止处理
# 第五步:如果仍然存在,记录错误信息并停止处理
error_msg
=
f
"提单 {bl.bl_no} 经过系统处理后仍存在目标文字: {', '.join(final_found_texts)},请取消该提单操作,手动处理"
error_msg
=
f
"提单 {bl.bl_no} 经过系统处理后仍存在目标文字: {', '.join(final_found_texts)},请取消该提单操作,手动处理"
...
@@ -877,7 +890,8 @@ class BatchGetPodInfoWizard(models.TransientModel):
...
@@ -877,7 +890,8 @@ class BatchGetPodInfoWizard(models.TransientModel):
_logger
.
warning
(
f
"提单 {bl.bl_no} AI处理失败,检查OCR处理结果"
)
_logger
.
warning
(
f
"提单 {bl.bl_no} AI处理失败,检查OCR处理结果"
)
# AI处理失败,检查OCR结果是否真的清除了目标文字
# AI处理失败,检查OCR结果是否真的清除了目标文字
ocr_check_pdf
=
base64
.
b64decode
(
processed_file_data
)
ocr_check_pdf
=
base64
.
b64decode
(
processed_file_data
)
text_still_exists
,
ocr_found_texts
=
self
.
_check_target_texts_exist
(
ocr_check_pdf
,
bl
.
bl_no
)
text_still_exists
,
ocr_found_texts
=
self
.
_check_target_texts_exist
(
ocr_check_pdf
,
bl
.
bl_no
)
if
text_still_exists
:
if
text_still_exists
:
error_msg
=
f
"提单 {bl.bl_no} 经过系统处理后仍存在目标文字: {', '.join(ocr_found_texts)},请取消该提单操作,手动处理"
error_msg
=
f
"提单 {bl.bl_no} 经过系统处理后仍存在目标文字: {', '.join(ocr_found_texts)},请取消该提单操作,手动处理"
error_messages
.
append
(
error_msg
)
error_messages
.
append
(
error_msg
)
...
@@ -890,7 +904,8 @@ class BatchGetPodInfoWizard(models.TransientModel):
...
@@ -890,7 +904,8 @@ class BatchGetPodInfoWizard(models.TransientModel):
_logger
.
error
(
f
"提单 {bl.bl_no} AI处理异常: {str(e)}"
)
_logger
.
error
(
f
"提单 {bl.bl_no} AI处理异常: {str(e)}"
)
# AI处理失败,使用OCR结果,但需要检查
# AI处理失败,使用OCR结果,但需要检查
final_check_pdf
=
base64
.
b64decode
(
processed_file_data
)
final_check_pdf
=
base64
.
b64decode
(
processed_file_data
)
text_still_exists
,
final_found_texts
=
self
.
_check_target_texts_exist
(
final_check_pdf
,
bl
.
bl_no
)
text_still_exists
,
final_found_texts
=
self
.
_check_target_texts_exist
(
final_check_pdf
,
bl
.
bl_no
)
if
text_still_exists
:
if
text_still_exists
:
error_msg
=
f
"提单 {bl.bl_no} 经过系统处理后仍存在目标文字: {', '.join(final_found_texts)},请取消该提单操作,手动处理"
error_msg
=
f
"提单 {bl.bl_no} 经过系统处理后仍存在目标文字: {', '.join(final_found_texts)},请取消该提单操作,手动处理"
error_messages
.
append
(
error_msg
)
error_messages
.
append
(
error_msg
)
...
@@ -991,26 +1006,25 @@ class BatchGetPodInfoWizard(models.TransientModel):
...
@@ -991,26 +1006,25 @@ class BatchGetPodInfoWizard(models.TransientModel):
"""
"""
import
fitz
# PyMuPDF
import
fitz
# PyMuPDF
import
base64
import
base64
import
mimetypes
import
gc
import
gc
import
os
import
os
import
tempfile
import
tempfile
from
PIL
import
Image
from
PIL
import
Image
import
time
import
time
start_time
=
time
.
time
()
start_time
=
time
.
time
()
_logger
.
info
(
f
"开始使用AI图片编辑处理PDF,提单号: {bl_no}"
)
_logger
.
info
(
f
"开始使用AI图片编辑处理PDF,提单号: {bl_no}"
)
# 初始化AI服务
# 初始化AI服务
ai_service
=
AIImageEditService
()
ai_service
=
AIImageEditService
()
# 打开PDF文档
# 打开PDF文档
pdf_document
=
fitz
.
open
(
stream
=
pdf_data
,
filetype
=
"pdf"
)
pdf_document
=
fitz
.
open
(
stream
=
pdf_data
,
filetype
=
"pdf"
)
total_pages
=
len
(
pdf_document
)
total_pages
=
len
(
pdf_document
)
total_ai_time
=
0.0
# 累计AI总耗时
total_ai_time
=
0.0
# 累计AI总耗时
_logger
.
info
(
f
"PDF总页数: {total_pages}"
)
_logger
.
info
(
f
"PDF总页数: {total_pages}"
)
# 对于多页PDF,使用临时文件方式减少内存占用
# 对于多页PDF,使用临时文件方式减少内存占用
use_temp_file
=
total_pages
>
5
# 超过5页使用临时文件
use_temp_file
=
total_pages
>
5
# 超过5页使用临时文件
temp_file_path
=
None
temp_file_path
=
None
...
@@ -1018,16 +1032,16 @@ class BatchGetPodInfoWizard(models.TransientModel):
...
@@ -1018,16 +1032,16 @@ class BatchGetPodInfoWizard(models.TransientModel):
import
tempfile
import
tempfile
temp_file_path
=
tempfile
.
mktemp
(
suffix
=
'.pdf'
)
temp_file_path
=
tempfile
.
mktemp
(
suffix
=
'.pdf'
)
_logger
.
info
(
f
"使用临时文件方式处理,减少内存占用: {temp_file_path}"
)
_logger
.
info
(
f
"使用临时文件方式处理,减少内存占用: {temp_file_path}"
)
processed_images
=
[]
# 存储处理后的PIL图片对象(分批处理)
processed_images
=
[]
# 存储处理后的PIL图片对象(分批处理)
batch_size
=
5
# 每批处理5页图片
batch_size
=
5
# 每批处理5页图片
# 遍历每一页(按照image-to-coordinate.py的逻辑)
# 遍历每一页(按照image-to-coordinate.py的逻辑)
for
page_num
in
range
(
total_pages
):
for
page_num
in
range
(
total_pages
):
page_start_time
=
time
.
time
()
page_start_time
=
time
.
time
()
page
=
pdf_document
[
page_num
]
page
=
pdf_document
[
page_num
]
_logger
.
info
(
f
"正在处理第{page_num + 1}页"
)
_logger
.
info
(
f
"正在处理第{page_num + 1}页"
)
# 将页面转换为图像(按照image-to-coordinate.py的pdf_to_images函数,使用dpi=150)
# 将页面转换为图像(按照image-to-coordinate.py的pdf_to_images函数,使用dpi=150)
# 对于内存优化,使用稍低的分辨率(120 DPI)以避免内存问题
# 对于内存优化,使用稍低的分辨率(120 DPI)以避免内存问题
dpi
=
120
dpi
=
120
...
@@ -1035,18 +1049,18 @@ class BatchGetPodInfoWizard(models.TransientModel):
...
@@ -1035,18 +1049,18 @@ class BatchGetPodInfoWizard(models.TransientModel):
pix
=
None
pix
=
None
img
=
None
img
=
None
img_bytes_io
=
None
img_bytes_io
=
None
try
:
try
:
pix
=
page
.
get_pixmap
(
matrix
=
mat
)
pix
=
page
.
get_pixmap
(
matrix
=
mat
)
# 将pixmap转换为PIL Image对象
# 将pixmap转换为PIL Image对象
img_data
=
pix
.
tobytes
(
"png"
)
img_data
=
pix
.
tobytes
(
"png"
)
del
pix
# 立即释放pixmap以节省内存
del
pix
# 立即释放pixmap以节省内存
pix
=
None
pix
=
None
gc
.
collect
()
# 强制垃圾回收
gc
.
collect
()
# 强制垃圾回收
img
=
Image
.
open
(
io
.
BytesIO
(
img_data
))
img
=
Image
.
open
(
io
.
BytesIO
(
img_data
))
# 获取图片尺寸(按照image-to-coordinate.py的逻辑)
# 获取图片尺寸(按照image-to-coordinate.py的逻辑)
img_w
,
img_h
=
img
.
size
img_w
,
img_h
=
img
.
size
_logger
.
info
(
f
"第{page_num + 1}页页面尺寸: {img_w}x{img_h} 像素"
)
_logger
.
info
(
f
"第{page_num + 1}页页面尺寸: {img_w}x{img_h} 像素"
)
...
@@ -1060,12 +1074,12 @@ class BatchGetPodInfoWizard(models.TransientModel):
...
@@ -1060,12 +1074,12 @@ class BatchGetPodInfoWizard(models.TransientModel):
img_bytes_io
=
None
img_bytes_io
=
None
del
img_data
# 释放图片数据
del
img_data
# 释放图片数据
gc
.
collect
()
# 强制垃圾回收
gc
.
collect
()
# 强制垃圾回收
# 使用AI编辑图片,移除指定文字(带重试机制)
# 使用AI编辑图片,移除指定文字(带重试机制)
edited_img_base64
=
None
edited_img_base64
=
None
ai_processing_time
=
0.0
ai_processing_time
=
0.0
max_retries
=
2
# 最多尝试2次(首次+1次重试)
max_retries
=
2
# 最多尝试2次(首次+1次重试)
for
attempt
in
range
(
1
,
max_retries
+
1
):
for
attempt
in
range
(
1
,
max_retries
+
1
):
ai_start_time
=
time
.
time
()
ai_start_time
=
time
.
time
()
try
:
try
:
...
@@ -1078,30 +1092,33 @@ class BatchGetPodInfoWizard(models.TransientModel):
...
@@ -1078,30 +1092,33 @@ class BatchGetPodInfoWizard(models.TransientModel):
attempt_time
=
ai_end_time
-
ai_start_time
attempt_time
=
ai_end_time
-
ai_start_time
ai_processing_time
+=
attempt_time
# 累计AI耗时
ai_processing_time
+=
attempt_time
# 累计AI耗时
total_ai_time
+=
attempt_time
# 累计总AI耗时
total_ai_time
+=
attempt_time
# 累计总AI耗时
if
edited_img_base64_raw
:
if
edited_img_base64_raw
:
edited_img_base64
=
edited_img_base64_raw
edited_img_base64
=
edited_img_base64_raw
_logger
.
info
(
f
"第{page_num + 1}页AI处理成功(第{attempt}次尝试),耗时: {attempt_time:.2f}秒"
)
_logger
.
info
(
f
"第{page_num + 1}页AI处理成功(第{attempt}次尝试),耗时: {attempt_time:.2f}秒"
)
break
break
else
:
else
:
if
attempt
<
max_retries
:
if
attempt
<
max_retries
:
_logger
.
warning
(
f
"第{page_num + 1}页AI处理失败(第{attempt}次尝试),将重试,耗时: {attempt_time:.2f}秒"
)
_logger
.
warning
(
f
"第{page_num + 1}页AI处理失败(第{attempt}次尝试),将重试,耗时: {attempt_time:.2f}秒"
)
else
:
else
:
_logger
.
warning
(
f
"第{page_num + 1}页AI处理失败(第{attempt}次尝试,已用尽重试),耗时: {attempt_time:.2f}秒"
)
_logger
.
warning
(
f
"第{page_num + 1}页AI处理失败(第{attempt}次尝试,已用尽重试),耗时: {attempt_time:.2f}秒"
)
except
Exception
as
e
:
except
Exception
as
e
:
ai_end_time
=
time
.
time
()
ai_end_time
=
time
.
time
()
attempt_time
=
ai_end_time
-
ai_start_time
attempt_time
=
ai_end_time
-
ai_start_time
ai_processing_time
+=
attempt_time
ai_processing_time
+=
attempt_time
total_ai_time
+=
attempt_time
total_ai_time
+=
attempt_time
_logger
.
error
(
f
"第{page_num + 1}页AI处理异常(第{attempt}次尝试): {str(e)},耗时: {attempt_time:.2f}秒"
)
_logger
.
error
(
f
"第{page_num + 1}页AI处理异常(第{attempt}次尝试): {str(e)},耗时: {attempt_time:.2f}秒"
)
if
attempt
<
max_retries
:
if
attempt
<
max_retries
:
_logger
.
info
(
f
"第{page_num + 1}页将进行第{attempt + 1}次重试"
)
_logger
.
info
(
f
"第{page_num + 1}页将进行第{attempt + 1}次重试"
)
edited_img_base64
=
None
edited_img_base64
=
None
# 释放encoded_string以节省内存
# 释放encoded_string以节省内存
del
encoded_string
del
encoded_string
gc
.
collect
()
gc
.
collect
()
if
edited_img_base64
:
if
edited_img_base64
:
# 解码base64图片数据并转换为PIL Image对象(按照image-to-coordinate.py的逻辑)
# 解码base64图片数据并转换为PIL Image对象(按照image-to-coordinate.py的逻辑)
edited_img_data
=
base64
.
b64decode
(
edited_img_base64
)
edited_img_data
=
base64
.
b64decode
(
edited_img_base64
)
...
@@ -1111,17 +1128,18 @@ class BatchGetPodInfoWizard(models.TransientModel):
...
@@ -1111,17 +1128,18 @@ class BatchGetPodInfoWizard(models.TransientModel):
processed_images
.
append
(
edited_img
)
processed_images
.
append
(
edited_img
)
_logger
.
info
(
f
"第{page_num + 1}页AI处理最终成功,总耗时: {ai_processing_time:.2f}秒"
)
_logger
.
info
(
f
"第{page_num + 1}页AI处理最终成功,总耗时: {ai_processing_time:.2f}秒"
)
else
:
else
:
_logger
.
warning
(
f
"第{page_num + 1}页AI处理最终失败(已重试),使用原始页面,总耗时: {ai_processing_time:.2f}秒"
)
_logger
.
warning
(
f
"第{page_num + 1}页AI处理最终失败(已重试),使用原始页面,总耗时: {ai_processing_time:.2f}秒"
)
# 如果AI处理失败,使用原始图片
# 如果AI处理失败,使用原始图片
processed_images
.
append
(
img
.
convert
(
'RGB'
))
processed_images
.
append
(
img
.
convert
(
'RGB'
))
# 释放原始图片对象
# 释放原始图片对象
if
img
:
if
img
:
img
.
close
()
img
.
close
()
del
img
del
img
img
=
None
img
=
None
gc
.
collect
()
# 强制垃圾回收
gc
.
collect
()
# 强制垃圾回收
# 分批处理:每处理batch_size页,就转换为PDF并保存到临时文件
# 分批处理:每处理batch_size页,就转换为PDF并保存到临时文件
if
use_temp_file
and
len
(
processed_images
)
>=
batch_size
:
if
use_temp_file
and
len
(
processed_images
)
>=
batch_size
:
_logger
.
info
(
f
"达到批次大小 {batch_size},开始保存到临时文件"
)
_logger
.
info
(
f
"达到批次大小 {batch_size},开始保存到临时文件"
)
...
@@ -1134,33 +1152,33 @@ class BatchGetPodInfoWizard(models.TransientModel):
...
@@ -1134,33 +1152,33 @@ class BatchGetPodInfoWizard(models.TransientModel):
batch_buffer
.
seek
(
0
)
batch_buffer
.
seek
(
0
)
pdf_bytes
=
batch_buffer
.
getvalue
()
pdf_bytes
=
batch_buffer
.
getvalue
()
batch_buffer
.
close
()
batch_buffer
.
close
()
# 释放已处理的图片
# 释放已处理的图片
for
img_obj
in
processed_images
:
for
img_obj
in
processed_images
:
if
img_obj
:
if
img_obj
:
img_obj
.
close
()
img_obj
.
close
()
processed_images
=
[]
processed_images
=
[]
gc
.
collect
()
gc
.
collect
()
if
os
.
path
.
exists
(
temp_file_path
)
and
os
.
path
.
getsize
(
temp_file_path
)
>
0
:
if
os
.
path
.
exists
(
temp_file_path
)
and
os
.
path
.
getsize
(
temp_file_path
)
>
0
:
# 追加到现有PDF:先读取现有内容,合并后保存到新文件,再替换
# 追加到现有PDF:先读取现有内容,合并后保存到新文件,再替换
with
open
(
temp_file_path
,
'rb'
)
as
f
:
with
open
(
temp_file_path
,
'rb'
)
as
f
:
existing_bytes
=
f
.
read
()
existing_bytes
=
f
.
read
()
existing_pdf
=
fitz
.
open
(
stream
=
existing_bytes
,
filetype
=
"pdf"
)
existing_pdf
=
fitz
.
open
(
stream
=
existing_bytes
,
filetype
=
"pdf"
)
new_pdf
=
fitz
.
open
(
stream
=
pdf_bytes
,
filetype
=
"pdf"
)
new_pdf
=
fitz
.
open
(
stream
=
pdf_bytes
,
filetype
=
"pdf"
)
existing_pdf
.
insert_pdf
(
new_pdf
)
existing_pdf
.
insert_pdf
(
new_pdf
)
new_pdf
.
close
()
new_pdf
.
close
()
# 保存到新临时文件,避免"save to original must be incremental"错误
# 保存到新临时文件,避免"save to original must be incremental"错误
new_temp_path
=
tempfile
.
mktemp
(
suffix
=
'.pdf'
)
new_temp_path
=
tempfile
.
mktemp
(
suffix
=
'.pdf'
)
existing_pdf
.
save
(
new_temp_path
,
garbage
=
4
,
deflate
=
True
,
clean
=
True
)
existing_pdf
.
save
(
new_temp_path
,
garbage
=
4
,
deflate
=
True
,
clean
=
True
)
existing_pdf
.
close
()
existing_pdf
.
close
()
# 替换旧文件
# 替换旧文件
os
.
remove
(
temp_file_path
)
os
.
remove
(
temp_file_path
)
os
.
rename
(
new_temp_path
,
temp_file_path
)
os
.
rename
(
new_temp_path
,
temp_file_path
)
# 释放资源
# 释放资源
del
existing_bytes
del
existing_bytes
del
pdf_bytes
del
pdf_bytes
...
@@ -1180,7 +1198,7 @@ class BatchGetPodInfoWizard(models.TransientModel):
...
@@ -1180,7 +1198,7 @@ class BatchGetPodInfoWizard(models.TransientModel):
img_obj
.
close
()
img_obj
.
close
()
processed_images
=
[]
processed_images
=
[]
gc
.
collect
()
gc
.
collect
()
except
Exception
as
e
:
except
Exception
as
e
:
_logger
.
error
(
f
"第{page_num + 1}页处理异常: {str(e)}"
)
_logger
.
error
(
f
"第{page_num + 1}页处理异常: {str(e)}"
)
# 确保资源被释放
# 确保资源被释放
...
@@ -1194,18 +1212,18 @@ class BatchGetPodInfoWizard(models.TransientModel):
...
@@ -1194,18 +1212,18 @@ class BatchGetPodInfoWizard(models.TransientModel):
gc
.
collect
()
gc
.
collect
()
# 如果处理失败,跳过这一页或使用原始页面
# 如果处理失败,跳过这一页或使用原始页面
continue
continue
page_end_time
=
time
.
time
()
page_end_time
=
time
.
time
()
page_processing_time
=
page_end_time
-
page_start_time
page_processing_time
=
page_end_time
-
page_start_time
_logger
.
info
(
f
"第{page_num + 1}页总处理时间: {page_processing_time:.2f}秒"
)
_logger
.
info
(
f
"第{page_num + 1}页总处理时间: {page_processing_time:.2f}秒"
)
pdf_document
.
close
()
pdf_document
.
close
()
# 将处理后的图片转换为PDF(按照image-to-coordinate.py的images_to_pdf函数逻辑)
# 将处理后的图片转换为PDF(按照image-to-coordinate.py的images_to_pdf函数逻辑)
pdf_creation_start
=
time
.
time
()
pdf_creation_start
=
time
.
time
()
result_data
=
None
result_data
=
None
import
os
import
os
try
:
try
:
if
use_temp_file
and
temp_file_path
:
if
use_temp_file
and
temp_file_path
:
# 如果还有剩余的图片,追加到临时文件
# 如果还有剩余的图片,追加到临时文件
...
@@ -1220,35 +1238,35 @@ class BatchGetPodInfoWizard(models.TransientModel):
...
@@ -1220,35 +1238,35 @@ class BatchGetPodInfoWizard(models.TransientModel):
batch_buffer
.
seek
(
0
)
batch_buffer
.
seek
(
0
)
temp_pdf_bytes
=
batch_buffer
.
getvalue
()
temp_pdf_bytes
=
batch_buffer
.
getvalue
()
batch_buffer
.
close
()
batch_buffer
.
close
()
# 释放图片
# 释放图片
for
img_obj
in
processed_images
:
for
img_obj
in
processed_images
:
if
img_obj
:
if
img_obj
:
img_obj
.
close
()
img_obj
.
close
()
processed_images
=
None
processed_images
=
None
gc
.
collect
()
gc
.
collect
()
# 追加到临时文件
# 追加到临时文件
if
os
.
path
.
exists
(
temp_file_path
)
and
os
.
path
.
getsize
(
temp_file_path
)
>
0
:
if
os
.
path
.
exists
(
temp_file_path
)
and
os
.
path
.
getsize
(
temp_file_path
)
>
0
:
# 如果临时文件已存在,先读取内容
# 如果临时文件已存在,先读取内容
with
open
(
temp_file_path
,
'rb'
)
as
f
:
with
open
(
temp_file_path
,
'rb'
)
as
f
:
existing_pdf_bytes
=
f
.
read
()
existing_pdf_bytes
=
f
.
read
()
# 合并PDF:打开现有PDF和新PDF,然后合并
# 合并PDF:打开现有PDF和新PDF,然后合并
existing_pdf
=
fitz
.
open
(
stream
=
existing_pdf_bytes
,
filetype
=
"pdf"
)
existing_pdf
=
fitz
.
open
(
stream
=
existing_pdf_bytes
,
filetype
=
"pdf"
)
new_pdf
=
fitz
.
open
(
stream
=
temp_pdf_bytes
,
filetype
=
"pdf"
)
new_pdf
=
fitz
.
open
(
stream
=
temp_pdf_bytes
,
filetype
=
"pdf"
)
existing_pdf
.
insert_pdf
(
new_pdf
)
existing_pdf
.
insert_pdf
(
new_pdf
)
new_pdf
.
close
()
new_pdf
.
close
()
# 保存到新的临时文件,避免"save to original must be incremental"错误
# 保存到新的临时文件,避免"save to original must be incremental"错误
new_temp_path
=
tempfile
.
mktemp
(
suffix
=
'.pdf'
)
new_temp_path
=
tempfile
.
mktemp
(
suffix
=
'.pdf'
)
existing_pdf
.
save
(
new_temp_path
,
garbage
=
4
,
deflate
=
True
,
clean
=
True
)
existing_pdf
.
save
(
new_temp_path
,
garbage
=
4
,
deflate
=
True
,
clean
=
True
)
existing_pdf
.
close
()
existing_pdf
.
close
()
# 删除旧临时文件,重命名新文件
# 删除旧临时文件,重命名新文件
os
.
remove
(
temp_file_path
)
os
.
remove
(
temp_file_path
)
os
.
rename
(
new_temp_path
,
temp_file_path
)
os
.
rename
(
new_temp_path
,
temp_file_path
)
# 释放资源
# 释放资源
del
existing_pdf_bytes
del
existing_pdf_bytes
del
temp_pdf_bytes
del
temp_pdf_bytes
...
@@ -1264,12 +1282,12 @@ class BatchGetPodInfoWizard(models.TransientModel):
...
@@ -1264,12 +1282,12 @@ class BatchGetPodInfoWizard(models.TransientModel):
# 注意:processed_images 在这里已经被释放了,需要重新获取
# 注意:processed_images 在这里已经被释放了,需要重新获取
# 如果还有剩余图片,需要重新处理(这种情况不应该发生,因为前面已经释放了)
# 如果还有剩余图片,需要重新处理(这种情况不应该发生,因为前面已经释放了)
_logger
.
warning
(
"追加剩余图片失败,剩余图片已在之前释放"
)
_logger
.
warning
(
"追加剩余图片失败,剩余图片已在之前释放"
)
# 从临时文件读取最终结果
# 从临时文件读取最终结果
if
os
.
path
.
exists
(
temp_file_path
):
if
os
.
path
.
exists
(
temp_file_path
):
with
open
(
temp_file_path
,
'rb'
)
as
f
:
with
open
(
temp_file_path
,
'rb'
)
as
f
:
result_data
=
f
.
read
()
result_data
=
f
.
read
()
# 删除临时文件
# 删除临时文件
try
:
try
:
os
.
remove
(
temp_file_path
)
os
.
remove
(
temp_file_path
)
...
@@ -1279,7 +1297,7 @@ class BatchGetPodInfoWizard(models.TransientModel):
...
@@ -1279,7 +1297,7 @@ class BatchGetPodInfoWizard(models.TransientModel):
else
:
else
:
_logger
.
error
(
"临时文件不存在,无法读取结果"
)
_logger
.
error
(
"临时文件不存在,无法读取结果"
)
return
None
return
None
elif
processed_images
:
elif
processed_images
:
# 使用内存方式处理(5页以内)
# 使用内存方式处理(5页以内)
output_buffer
=
io
.
BytesIO
()
output_buffer
=
io
.
BytesIO
()
...
@@ -1289,10 +1307,10 @@ class BatchGetPodInfoWizard(models.TransientModel):
...
@@ -1289,10 +1307,10 @@ class BatchGetPodInfoWizard(models.TransientModel):
# 即使rest是空列表,也直接传入(PIL会正确处理)
# 即使rest是空列表,也直接传入(PIL会正确处理)
first
.
save
(
output_buffer
,
format
=
'PDF'
,
save_all
=
True
,
append_images
=
rest
)
first
.
save
(
output_buffer
,
format
=
'PDF'
,
save_all
=
True
,
append_images
=
rest
)
output_buffer
.
seek
(
0
)
output_buffer
.
seek
(
0
)
result_data
=
output_buffer
.
getvalue
()
result_data
=
output_buffer
.
getvalue
()
output_buffer
.
close
()
output_buffer
.
close
()
# 释放所有图片对象
# 释放所有图片对象
for
img_obj
in
processed_images
:
for
img_obj
in
processed_images
:
if
img_obj
:
if
img_obj
:
...
@@ -1303,9 +1321,9 @@ class BatchGetPodInfoWizard(models.TransientModel):
...
@@ -1303,9 +1321,9 @@ class BatchGetPodInfoWizard(models.TransientModel):
else
:
else
:
_logger
.
error
(
"没有需要写入PDF的图片"
)
_logger
.
error
(
"没有需要写入PDF的图片"
)
return
None
return
None
gc
.
collect
()
# 强制垃圾回收
gc
.
collect
()
# 强制垃圾回收
except
Exception
as
e
:
except
Exception
as
e
:
_logger
.
error
(
f
"PDF创建失败: {str(e)}"
)
_logger
.
error
(
f
"PDF创建失败: {str(e)}"
)
# 确保资源被释放
# 确保资源被释放
...
@@ -1320,18 +1338,19 @@ class BatchGetPodInfoWizard(models.TransientModel):
...
@@ -1320,18 +1338,19 @@ class BatchGetPodInfoWizard(models.TransientModel):
pass
pass
gc
.
collect
()
gc
.
collect
()
return
None
return
None
pdf_creation_end
=
time
.
time
()
pdf_creation_end
=
time
.
time
()
total_time
=
time
.
time
()
-
start_time
total_time
=
time
.
time
()
-
start_time
pdf_creation_time
=
pdf_creation_end
-
pdf_creation_start
pdf_creation_time
=
pdf_creation_end
-
pdf_creation_start
_logger
.
info
(
f
"AI图片编辑PDF处理完成,提单号: {bl_no}"
)
_logger
.
info
(
f
"AI图片编辑PDF处理完成,提单号: {bl_no}"
)
_logger
.
info
(
f
"总处理时间: {total_time:.2f}秒"
)
_logger
.
info
(
f
"总处理时间: {total_time:.2f}秒"
)
_logger
.
info
(
f
"AI总耗时: {total_ai_time:.2f}秒(累计所有页面的AI处理时间)"
)
_logger
.
info
(
f
"AI总耗时: {total_ai_time:.2f}秒(累计所有页面的AI处理时间)"
)
_logger
.
info
(
f
"PDF创建时间: {pdf_creation_time:.2f}秒"
)
_logger
.
info
(
f
"PDF创建时间: {pdf_creation_time:.2f}秒"
)
_logger
.
info
(
f
"平均每页AI处理时间: {total_ai_time/total_pages:.2f}秒"
if
total_pages
>
0
else
"平均每页AI处理时间: 0.00秒"
)
_logger
.
info
(
f
"平均每页AI处理时间: {total_ai_time / total_pages:.2f}秒"
if
total_pages
>
0
else
"平均每页AI处理时间: 0.00秒"
)
return
result_data
return
result_data
def
_process_pdf_with_ocr
(
self
,
pdf_data
,
bl_no
,
debug_mode
=
False
):
def
_process_pdf_with_ocr
(
self
,
pdf_data
,
bl_no
,
debug_mode
=
False
):
...
@@ -1371,7 +1390,7 @@ class BatchGetPodInfoWizard(models.TransientModel):
...
@@ -1371,7 +1390,7 @@ class BatchGetPodInfoWizard(models.TransientModel):
all_recognized_texts
=
[]
all_recognized_texts
=
[]
result_data
=
False
result_data
=
False
total_pages
=
len
(
pdf_document
)
total_pages
=
len
(
pdf_document
)
# 处理每一页(完全按照HTML逻辑)
# 处理每一页(完全按照HTML逻辑)
for
page_num
in
range
(
total_pages
):
for
page_num
in
range
(
total_pages
):
page_start_time
=
time
.
time
()
page_start_time
=
time
.
time
()
...
@@ -1516,12 +1535,13 @@ class BatchGetPodInfoWizard(models.TransientModel):
...
@@ -1516,12 +1535,13 @@ class BatchGetPodInfoWizard(models.TransientModel):
# 计算总处理时间
# 计算总处理时间
total_time
=
time
.
time
()
-
start_time
total_time
=
time
.
time
()
-
start_time
# 输出处理总结
# 输出处理总结
_logger
.
info
(
f
"OCR处理完成 - 提单号: {bl_no}, 处理页数: {processed_pages}, 删除矩形数: {total_rectangles}, 检测到文字数: {len(detected_texts)}"
)
_logger
.
info
(
f
"OCR处理完成 - 提单号: {bl_no}, 处理页数: {processed_pages}, 删除矩形数: {total_rectangles}, 检测到文字数: {len(detected_texts)}"
)
_logger
.
info
(
f
"OCR总处理时间: {total_time:.2f}秒"
)
_logger
.
info
(
f
"OCR总处理时间: {total_time:.2f}秒"
)
_logger
.
info
(
f
"PDF保存时间: {pdf_save_time:.2f}秒"
)
_logger
.
info
(
f
"PDF保存时间: {pdf_save_time:.2f}秒"
)
_logger
.
info
(
f
"平均每页OCR处理时间: {total_time
/
total_pages:.2f}秒"
)
_logger
.
info
(
f
"平均每页OCR处理时间: {total_time
/
total_pages:.2f}秒"
)
if
detected_texts
:
if
detected_texts
:
_logger
.
info
(
f
"检测到的目标文字: {[text['text'] for text in detected_texts]}"
)
_logger
.
info
(
f
"检测到的目标文字: {[text['text'] for text in detected_texts]}"
)
except
Exception
as
e
:
except
Exception
as
e
:
...
@@ -1609,7 +1629,8 @@ class BatchGetPodInfoWizard(models.TransientModel):
...
@@ -1609,7 +1629,8 @@ class BatchGetPodInfoWizard(models.TransientModel):
Find target texts using OCR results (完全按照HTML逻辑) # 使用OCR结果查找目标文字
Find target texts using OCR results (完全按照HTML逻辑) # 使用OCR结果查找目标文字
"""
"""
# 定义目标文字和排除文字(与HTML文件完全一致)
# 定义目标文字和排除文字(与HTML文件完全一致)
TARGET_TEXTS
=
[
'AGN'
,
'ACN'
,
'UCLINK LOGISITICS LTD'
,
'UCLINK LOGISITICS'
,
'UCLINK'
,
'LOGISITICS'
,
'LOGISTICS'
,
'LTD'
,
TARGET_TEXTS
=
[
'AGN'
,
'ACN'
,
'UCLINK LOGISITICS LTD'
,
'UCLINK LOGISITICS'
,
'UCLINK'
,
'LOGISITICS'
,
'LOGISTICS'
,
'LTD'
,
'UCLINKLOGISITICSLTD'
]
'UCLINKLOGISITICSLTD'
]
EXCLUDE_TEXTS
=
[
'AIR EQK'
,
'ARN'
,
'EQK'
,
'AIR'
,
'Page 1 of 1'
,
'Page 2 of 2'
,
'Page 3 of 3'
,
'Page 4 of 4'
,
EXCLUDE_TEXTS
=
[
'AIR EQK'
,
'ARN'
,
'EQK'
,
'AIR'
,
'Page 1 of 1'
,
'Page 2 of 2'
,
'Page 3 of 3'
,
'Page 4 of 4'
,
'Page 5 of 5'
]
'Page 5 of 5'
]
...
@@ -1782,13 +1803,13 @@ class BatchGetPodInfoWizard(models.TransientModel):
...
@@ -1782,13 +1803,13 @@ class BatchGetPodInfoWizard(models.TransientModel):
if
not
pod_node
:
if
not
pod_node
:
_logger
.
info
(
f
"未找到尾程POD节点匹配的节点,提单号: {bl.bl_no}"
)
_logger
.
info
(
f
"未找到尾程POD节点匹配的节点,提单号: {bl.bl_no}"
)
continue
continue
# 只使用满足条件的小包(经过验证的valid_packages)
# 只使用满足条件的小包(经过验证的valid_packages)
valid_packages
=
file_info
.
get
(
'valid_packages'
,
[])
valid_packages
=
file_info
.
get
(
'valid_packages'
,
[])
if
not
valid_packages
:
if
not
valid_packages
:
_logger
.
warning
(
f
"提单 {bl.bl_no} 没有满足条件的小包,跳过节点推送"
)
_logger
.
warning
(
f
"提单 {bl.bl_no} 没有满足条件的小包,跳过节点推送"
)
continue
continue
# 从valid_packages中提取小包ID(记录集对象或列表)
# 从valid_packages中提取小包ID(记录集对象或列表)
if
hasattr
(
valid_packages
,
'ids'
):
if
hasattr
(
valid_packages
,
'ids'
):
# 如果是记录集对象,直接获取IDs
# 如果是记录集对象,直接获取IDs
...
@@ -1799,13 +1820,13 @@ class BatchGetPodInfoWizard(models.TransientModel):
...
@@ -1799,13 +1820,13 @@ class BatchGetPodInfoWizard(models.TransientModel):
else
:
else
:
_logger
.
warning
(
f
"提单 {bl.bl_no} valid_packages格式不正确: {type(valid_packages)}"
)
_logger
.
warning
(
f
"提单 {bl.bl_no} valid_packages格式不正确: {type(valid_packages)}"
)
valid_package_ids
=
[]
valid_package_ids
=
[]
_logger
.
info
(
f
"提单 {bl.bl_no} 满足条件的小包ID: {valid_package_ids} (共 {len(valid_package_ids)} 个)"
)
_logger
.
info
(
f
"提单 {bl.bl_no} 满足条件的小包ID: {valid_package_ids} (共 {len(valid_package_ids)} 个)"
)
if
not
valid_package_ids
:
if
not
valid_package_ids
:
_logger
.
warning
(
f
"提单 {bl.bl_no} 满足条件的小包ID为空,跳过节点推送"
)
_logger
.
warning
(
f
"提单 {bl.bl_no} 满足条件的小包ID为空,跳过节点推送"
)
continue
continue
# 从PDF文件提取红色框的时间
# 从PDF文件提取红色框的时间
file_data
=
file_info
.
get
(
'file_data'
)
file_data
=
file_info
.
get
(
'file_data'
)
if
not
file_data
:
if
not
file_data
:
...
@@ -1819,7 +1840,8 @@ class BatchGetPodInfoWizard(models.TransientModel):
...
@@ -1819,7 +1840,8 @@ class BatchGetPodInfoWizard(models.TransientModel):
if
extracted_times
:
if
extracted_times
:
# 取最早的时间作为节点操作时间
# 取最早的时间作为节点操作时间
earliest_time
=
min
(
extracted_times
)
earliest_time
=
min
(
extracted_times
)
_logger
.
info
(
f
"提取到最早时间: {earliest_time},将作为节点操作时间,满足条件的小包数量: {len(valid_package_ids)},小包ID: {valid_package_ids}"
)
_logger
.
info
(
f
"提取到最早时间: {earliest_time},将作为节点操作时间,满足条件的小包数量: {len(valid_package_ids)},小包ID: {valid_package_ids}"
)
ship_packages
.
append
({
ship_packages
.
append
({
'bl_id'
:
bl
.
id
,
'bl_id'
:
bl
.
id
,
'id'
:
valid_package_ids
,
# 只包含满足条件的小包ID
'id'
:
valid_package_ids
,
# 只包含满足条件的小包ID
...
@@ -1833,11 +1855,11 @@ class BatchGetPodInfoWizard(models.TransientModel):
...
@@ -1833,11 +1855,11 @@ class BatchGetPodInfoWizard(models.TransientModel):
error_bl
.
append
(
bl
)
error_bl
.
append
(
bl
)
if
error_bl
:
if
error_bl
:
_logger
.
warning
(
f
"提单 {', '.join([bl.bl_no for bl in error_bl])} 没有提取到时间信息"
)
_logger
.
warning
(
f
"提单 {', '.join([bl.bl_no for bl in error_bl])} 没有提取到时间信息"
)
if
not
self
.
_context
.
get
(
'is_skip_raise_error'
):
if
not
self
.
_context
.
get
(
'is_skip_raise_error'
):
raise
ValidationError
(
raise
ValidationError
(
_
(
'
%
s bill of loading cannot get node operation time,please manually upload push tk'
)
%
(
_
(
'
%
s bill of loading cannot get node operation time,please manually upload push tk'
)
%
(
', '
.
join
([
bl
.
bl_no
for
bl
in
error_bl
])))
# xx提单号没有获取到节点操作时间,请手动上传推送提单到TK
', '
.
join
([
bl
.
bl_no
for
bl
in
error_bl
])))
# xx提单号没有获取到节点操作时间,请手动上传推送提单到TK
return
ship_packages
,
pod_node
.
id
return
ship_packages
,
pod_node
.
id
def
_sync_match_node
(
self
,
ship_packages
,
pod_node_id
):
def
_sync_match_node
(
self
,
ship_packages
,
pod_node_id
):
...
@@ -2054,29 +2076,30 @@ class BatchGetPodInfoWizard(models.TransientModel):
...
@@ -2054,29 +2076,30 @@ class BatchGetPodInfoWizard(models.TransientModel):
from
PIL
import
Image
from
PIL
import
Image
import
re
import
re
import
gc
import
gc
# 定义目标文字(与_find_target_texts一致)
# 定义目标文字(与_find_target_texts一致)
TARGET_TEXTS
=
[
'AGN'
,
'ACN'
,
'UCLINK LOGISITICS LTD'
,
'UCLINK LOGISITICS'
,
'UCLINK'
,
'LOGISITICS'
,
'LOGISTICS'
,
'LTD'
,
TARGET_TEXTS
=
[
'AGN'
,
'ACN'
,
'UCLINK LOGISITICS LTD'
,
'UCLINK LOGISITICS'
,
'UCLINK'
,
'LOGISITICS'
,
'LOGISTICS'
,
'LTD'
,
'UCLINKLOGISITICSLTD'
]
'UCLINKLOGISITICSLTD'
]
EXCLUDE_TEXTS
=
[
'AIR EQK'
,
'ARN'
,
'EQK'
,
'AIR'
,
'Page 1 of 1'
,
'Page 2 of 2'
,
'Page 3 of 3'
,
'Page 4 of 4'
,
EXCLUDE_TEXTS
=
[
'AIR EQK'
,
'ARN'
,
'EQK'
,
'AIR'
,
'Page 1 of 1'
,
'Page 2 of 2'
,
'Page 3 of 3'
,
'Page 4 of 4'
,
'Page 5 of 5'
]
'Page 5 of 5'
]
pdf_document
=
None
pdf_document
=
None
try
:
try
:
# 设置Tesseract路径
# 设置Tesseract路径
self
.
_setup_tesseract_path
()
self
.
_setup_tesseract_path
()
# 打开PDF文档
# 打开PDF文档
pdf_document
=
fitz
.
open
(
stream
=
pdf_binary
,
filetype
=
"pdf"
)
pdf_document
=
fitz
.
open
(
stream
=
pdf_binary
,
filetype
=
"pdf"
)
found_texts
=
[]
found_texts
=
[]
# 尝试导入OpenCV,如果失败则使用PIL替代
# 尝试导入OpenCV,如果失败则使用PIL替代
try
:
try
:
import
cv2
import
cv2
cv2_available
=
True
cv2_available
=
True
except
ImportError
:
except
ImportError
:
cv2_available
=
False
cv2_available
=
False
# 遍历每一页
# 遍历每一页
for
page_num
in
range
(
len
(
pdf_document
)):
for
page_num
in
range
(
len
(
pdf_document
)):
page
=
pdf_document
[
page_num
]
page
=
pdf_document
[
page_num
]
...
@@ -2085,11 +2108,11 @@ class BatchGetPodInfoWizard(models.TransientModel):
...
@@ -2085,11 +2108,11 @@ class BatchGetPodInfoWizard(models.TransientModel):
img
=
None
img
=
None
nparr
=
None
nparr
=
None
img_data
=
None
img_data
=
None
try
:
try
:
# 首先尝试从PDF文本层提取(如果是文本型PDF)
# 首先尝试从PDF文本层提取(如果是文本型PDF)
page_text_pdf
=
page
.
get_text
()
.
upper
()
page_text_pdf
=
page
.
get_text
()
.
upper
()
# 将页面转换为图像进行OCR识别(降低分辨率以节省内存)
# 将页面转换为图像进行OCR识别(降低分辨率以节省内存)
# 使用 2.0 倍分辨率(约 144 DPI)而不是 3.0 倍(约 216 DPI)
# 使用 2.0 倍分辨率(约 144 DPI)而不是 3.0 倍(约 216 DPI)
mat
=
fitz
.
Matrix
(
2.0
,
2.0
)
mat
=
fitz
.
Matrix
(
2.0
,
2.0
)
...
@@ -2098,7 +2121,7 @@ class BatchGetPodInfoWizard(models.TransientModel):
...
@@ -2098,7 +2121,7 @@ class BatchGetPodInfoWizard(models.TransientModel):
del
pix
# 立即释放pixmap
del
pix
# 立即释放pixmap
pix
=
None
pix
=
None
gc
.
collect
()
# 强制垃圾回收
gc
.
collect
()
# 强制垃圾回收
# 转换为PIL图像
# 转换为PIL图像
if
cv2_available
:
if
cv2_available
:
nparr
=
np
.
frombuffer
(
img_data
,
np
.
uint8
)
nparr
=
np
.
frombuffer
(
img_data
,
np
.
uint8
)
...
@@ -2113,12 +2136,12 @@ class BatchGetPodInfoWizard(models.TransientModel):
...
@@ -2113,12 +2136,12 @@ class BatchGetPodInfoWizard(models.TransientModel):
pil_img
=
Image
.
open
(
io
.
BytesIO
(
img_data
))
pil_img
=
Image
.
open
(
io
.
BytesIO
(
img_data
))
if
pil_img
.
mode
!=
'RGB'
:
if
pil_img
.
mode
!=
'RGB'
:
pil_img
=
pil_img
.
convert
(
'RGB'
)
pil_img
=
pil_img
.
convert
(
'RGB'
)
# 释放img_data
# 释放img_data
del
img_data
del
img_data
img_data
=
None
img_data
=
None
gc
.
collect
()
gc
.
collect
()
# OCR识别
# OCR识别
try
:
try
:
config
=
'--psm 6 --oem 1 -c tessedit_char_whitelist=ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789.,- -c preserve_interword_spaces=1'
config
=
'--psm 6 --oem 1 -c tessedit_char_whitelist=ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789.,- -c preserve_interword_spaces=1'
...
@@ -2126,15 +2149,15 @@ class BatchGetPodInfoWizard(models.TransientModel):
...
@@ -2126,15 +2149,15 @@ class BatchGetPodInfoWizard(models.TransientModel):
except
Exception
as
e
:
except
Exception
as
e
:
_logger
.
warning
(
f
"OCR识别失败,第{page_num + 1}页,使用PDF文本: {str(e)}"
)
_logger
.
warning
(
f
"OCR识别失败,第{page_num + 1}页,使用PDF文本: {str(e)}"
)
ocr_text
=
page_text_pdf
ocr_text
=
page_text_pdf
# 合并PDF文本和OCR文本进行检查
# 合并PDF文本和OCR文本进行检查
combined_text
=
(
page_text_pdf
+
' '
+
ocr_text
)
.
upper
()
combined_text
=
(
page_text_pdf
+
' '
+
ocr_text
)
.
upper
()
# 使用与_find_target_texts完全相同的逻辑:先进行OCR单词识别
# 使用与_find_target_texts完全相同的逻辑:先进行OCR单词识别
try
:
try
:
# 获取OCR识别的单词列表
# 获取OCR识别的单词列表
words
=
pytesseract
.
image_to_data
(
pil_img
,
output_type
=
pytesseract
.
Output
.
DICT
,
lang
=
'eng'
)
words
=
pytesseract
.
image_to_data
(
pil_img
,
output_type
=
pytesseract
.
Output
.
DICT
,
lang
=
'eng'
)
# 过滤出有效的单词
# 过滤出有效的单词
valid_words
=
[]
valid_words
=
[]
for
i
in
range
(
len
(
words
[
'text'
])):
for
i
in
range
(
len
(
words
[
'text'
])):
...
@@ -2150,27 +2173,27 @@ class BatchGetPodInfoWizard(models.TransientModel):
...
@@ -2150,27 +2173,27 @@ class BatchGetPodInfoWizard(models.TransientModel):
'y1'
:
words
[
'top'
][
i
]
+
words
[
'height'
][
i
]
'y1'
:
words
[
'top'
][
i
]
+
words
[
'height'
][
i
]
}
}
})
})
# 释放words字典以节省内存
# 释放words字典以节省内存
del
words
del
words
gc
.
collect
()
gc
.
collect
()
# 使用与_find_target_texts相同的匹配逻辑
# 使用与_find_target_texts相同的匹配逻辑
page_found_texts
=
self
.
_find_target_texts
(
valid_words
,
page_num
,
800
,
600
,
800
,
600
)
page_found_texts
=
self
.
_find_target_texts
(
valid_words
,
page_num
,
800
,
600
,
800
,
600
)
del
valid_words
# 释放valid_words列表
del
valid_words
# 释放valid_words列表
gc
.
collect
()
gc
.
collect
()
if
page_found_texts
:
if
page_found_texts
:
for
found_text
in
page_found_texts
:
for
found_text
in
page_found_texts
:
found_texts
.
append
(
f
"第{page_num + 1}页: {found_text['text']}"
)
found_texts
.
append
(
f
"第{page_num + 1}页: {found_text['text']}"
)
break
# 找到就跳出,避免重复
break
# 找到就跳出,避免重复
except
Exception
as
e
:
except
Exception
as
e
:
_logger
.
warning
(
f
"OCR单词识别失败,第{page_num + 1}页,使用文本匹配: {str(e)}"
)
_logger
.
warning
(
f
"OCR单词识别失败,第{page_num + 1}页,使用文本匹配: {str(e)}"
)
# 如果OCR单词识别失败,回退到文本匹配
# 如果OCR单词识别失败,回退到文本匹配
for
target_text
in
TARGET_TEXTS
:
for
target_text
in
TARGET_TEXTS
:
target_upper
=
target_text
.
upper
()
target_upper
=
target_text
.
upper
()
# 检查是否包含目标文字
# 检查是否包含目标文字
is_match
=
False
is_match
=
False
if
target_text
==
'AGN'
:
if
target_text
==
'AGN'
:
...
@@ -2191,7 +2214,7 @@ class BatchGetPodInfoWizard(models.TransientModel):
...
@@ -2191,7 +2214,7 @@ class BatchGetPodInfoWizard(models.TransientModel):
# 排除AIR、EQK、ARN等(需要这些词都不存在)
# 排除AIR、EQK、ARN等(需要这些词都不存在)
if
'AIR EQK'
not
in
combined_text
and
'ARN'
not
in
combined_text
:
if
'AIR EQK'
not
in
combined_text
and
'ARN'
not
in
combined_text
:
is_match
=
True
is_match
=
True
# 如果匹配,检查是否在排除列表中
# 如果匹配,检查是否在排除列表中
if
is_match
:
if
is_match
:
is_excluded
=
False
is_excluded
=
False
...
@@ -2199,18 +2222,19 @@ class BatchGetPodInfoWizard(models.TransientModel):
...
@@ -2199,18 +2222,19 @@ class BatchGetPodInfoWizard(models.TransientModel):
exclude_upper
=
exclude_text
.
upper
()
exclude_upper
=
exclude_text
.
upper
()
if
exclude_upper
in
combined_text
and
target_upper
in
combined_text
:
if
exclude_upper
in
combined_text
and
target_upper
in
combined_text
:
# 检查是否是页码
# 检查是否是页码
if
re
.
search
(
r'PAGE\s+\d+\s+OF\s+\d+'
,
combined_text
)
or
re
.
search
(
r'\d+\s*/\s*\d+'
,
combined_text
):
if
re
.
search
(
r'PAGE\s+\d+\s+OF\s+\d+'
,
combined_text
)
or
re
.
search
(
r'\d+\s*/\s*\d+'
,
combined_text
):
is_excluded
=
True
is_excluded
=
True
break
break
# 检查是否是AIR EQK等排除项
# 检查是否是AIR EQK等排除项
if
'AIR EQK'
in
combined_text
or
'ARN'
in
combined_text
:
if
'AIR EQK'
in
combined_text
or
'ARN'
in
combined_text
:
is_excluded
=
True
is_excluded
=
True
break
break
if
not
is_excluded
:
if
not
is_excluded
:
found_texts
.
append
(
f
"第{page_num + 1}页: {target_text}"
)
found_texts
.
append
(
f
"第{page_num + 1}页: {target_text}"
)
break
# 找到就跳出,避免重复
break
# 找到就跳出,避免重复
# 释放PIL图像和文本变量
# 释放PIL图像和文本变量
if
pil_img
:
if
pil_img
:
pil_img
.
close
()
pil_img
.
close
()
...
@@ -2220,7 +2244,7 @@ class BatchGetPodInfoWizard(models.TransientModel):
...
@@ -2220,7 +2244,7 @@ class BatchGetPodInfoWizard(models.TransientModel):
del
combined_text
del
combined_text
pil_img
=
None
pil_img
=
None
gc
.
collect
()
# 强制垃圾回收
gc
.
collect
()
# 强制垃圾回收
except
Exception
as
e
:
except
Exception
as
e
:
_logger
.
error
(
f
"第{page_num + 1}页处理异常: {str(e)}"
)
_logger
.
error
(
f
"第{page_num + 1}页处理异常: {str(e)}"
)
# 确保资源被释放
# 确保资源被释放
...
@@ -2237,19 +2261,19 @@ class BatchGetPodInfoWizard(models.TransientModel):
...
@@ -2237,19 +2261,19 @@ class BatchGetPodInfoWizard(models.TransientModel):
del
img_data
del
img_data
gc
.
collect
()
gc
.
collect
()
continue
continue
if
pdf_document
:
if
pdf_document
:
pdf_document
.
close
()
pdf_document
.
close
()
pdf_document
=
None
pdf_document
=
None
gc
.
collect
()
gc
.
collect
()
if
found_texts
:
if
found_texts
:
_logger
.
warning
(
f
"提单 {bl_no} 仍存在目标文字: {', '.join(found_texts)}"
)
_logger
.
warning
(
f
"提单 {bl_no} 仍存在目标文字: {', '.join(found_texts)}"
)
return
True
,
found_texts
return
True
,
found_texts
else
:
else
:
_logger
.
info
(
f
"提单 {bl_no} 未发现目标文字"
)
_logger
.
info
(
f
"提单 {bl_no} 未发现目标文字"
)
return
False
,
[]
return
False
,
[]
except
Exception
as
e
:
except
Exception
as
e
:
_logger
.
error
(
f
"检查目标文字失败,提单号: {bl_no}, 错误: {str(e)}"
)
_logger
.
error
(
f
"检查目标文字失败,提单号: {bl_no}, 错误: {str(e)}"
)
# 确保资源被释放
# 确保资源被释放
...
@@ -2262,8 +2286,7 @@ class BatchGetPodInfoWizard(models.TransientModel):
...
@@ -2262,8 +2286,7 @@ class BatchGetPodInfoWizard(models.TransientModel):
# 检查失败时,假设不存在(避免误报)
# 检查失败时,假设不存在(避免误报)
return
False
,
[]
return
False
,
[]
def
_cleanup_temp_attachments
(
self
,
bl_objs
=
None
):
def
_cleanup_temp_attachments
(
self
,
bl_objs
=
None
):
"""
"""
清理与当前向导相关的临时附件,包括服务器和本地开发环境的物理文件
清理与当前向导相关的临时附件,包括服务器和本地开发环境的物理文件
"""
"""
...
@@ -2273,11 +2296,11 @@ class BatchGetPodInfoWizard(models.TransientModel):
...
@@ -2273,11 +2296,11 @@ class BatchGetPodInfoWizard(models.TransientModel):
(
'res_id'
,
'in'
,
bl_objs
.
ids
),
(
'res_id'
,
'in'
,
bl_objs
.
ids
),
(
'name'
,
'like'
,
'temp_pod_
%
'
)
(
'name'
,
'like'
,
'temp_pod_
%
'
)
])
])
if
attachments
:
if
attachments
:
# 删除数据库记录
# 删除数据库记录
attachments
.
unlink
()
attachments
.
unlink
()
except
Exception
as
e
:
except
Exception
as
e
:
_logger
.
error
(
f
"清理临时附件失败: {str(e)}"
)
_logger
.
error
(
f
"清理临时附件失败: {str(e)}"
)
...
@@ -2289,7 +2312,7 @@ class BatchGetPodInfoWizard(models.TransientModel):
...
@@ -2289,7 +2312,7 @@ class BatchGetPodInfoWizard(models.TransientModel):
"""
"""
# 注意:不在这里清理临时附件,因为预览时需要保留附件数据
# 注意:不在这里清理临时附件,因为预览时需要保留附件数据
# 只有在确认操作完成后才清理临时附件
# 只有在确认操作完成后才清理临时附件
serialized_data
=
[]
serialized_data
=
[]
for
file_info
in
processed_files
:
for
file_info
in
processed_files
:
if
not
file_info
.
get
(
'bl'
):
if
not
file_info
.
get
(
'bl'
):
...
@@ -2310,7 +2333,7 @@ class BatchGetPodInfoWizard(models.TransientModel):
...
@@ -2310,7 +2333,7 @@ class BatchGetPodInfoWizard(models.TransientModel):
})
})
attachment_id
=
attachment
.
id
attachment_id
=
attachment
.
id
_logger
.
info
(
f
"已创建临时附件存储文件: {attachment.name}, ID: {attachment_id}"
)
_logger
.
info
(
f
"已创建临时附件存储文件: {attachment.name}, ID: {attachment_id}"
)
# 验证附件创建后数据是否正确
# 验证附件创建后数据是否正确
created_attachment
=
self
.
env
[
'ir.attachment'
]
.
browse
(
attachment_id
)
created_attachment
=
self
.
env
[
'ir.attachment'
]
.
browse
(
attachment_id
)
if
created_attachment
.
datas
:
if
created_attachment
.
datas
:
...
@@ -2318,21 +2341,22 @@ class BatchGetPodInfoWizard(models.TransientModel):
...
@@ -2318,21 +2341,22 @@ class BatchGetPodInfoWizard(models.TransientModel):
try
:
try
:
original_decoded
=
base64
.
b64decode
(
file_data
)
original_decoded
=
base64
.
b64decode
(
file_data
)
attachment_decoded
=
base64
.
b64decode
(
created_attachment
.
datas
)
attachment_decoded
=
base64
.
b64decode
(
created_attachment
.
datas
)
if
len
(
original_decoded
)
==
len
(
attachment_decoded
):
if
len
(
original_decoded
)
==
len
(
attachment_decoded
):
_logger
.
info
(
f
"附件数据验证成功,解码后长度: {len(original_decoded)}"
)
_logger
.
info
(
f
"附件数据验证成功,解码后长度: {len(original_decoded)}"
)
else
:
else
:
_logger
.
warning
(
f
"附件数据长度不匹配: 原始={len(original_decoded)}, 附件={len(attachment_decoded)}"
)
_logger
.
warning
(
f
"附件数据长度不匹配: 原始={len(original_decoded)}, 附件={len(attachment_decoded)}"
)
except
Exception
as
e
:
except
Exception
as
e
:
_logger
.
warning
(
f
"附件数据验证失败: {str(e)}"
)
_logger
.
warning
(
f
"附件数据验证失败: {str(e)}"
)
else
:
else
:
_logger
.
error
(
f
"附件数据为空"
)
_logger
.
error
(
f
"附件数据为空"
)
except
Exception
as
e
:
except
Exception
as
e
:
_logger
.
error
(
f
"创建临时附件失败: {str(e)}"
)
_logger
.
error
(
f
"创建临时附件失败: {str(e)}"
)
else
:
else
:
_logger
.
warning
(
f
"提单 {bl.bl_no} 的文件数据为空,无法创建附件"
)
_logger
.
warning
(
f
"提单 {bl.bl_no} 的文件数据为空,无法创建附件"
)
data
=
{
data
=
{
'bl_id'
:
bl
.
id
,
'bl_id'
:
bl
.
id
,
'bl_no'
:
bl
.
bl_no
,
'bl_no'
:
bl
.
bl_no
,
...
@@ -2353,7 +2377,8 @@ class BatchGetPodInfoWizard(models.TransientModel):
...
@@ -2353,7 +2377,8 @@ class BatchGetPodInfoWizard(models.TransientModel):
data
[
'valid_package_ids'
]
=
[
p
.
id
for
p
in
valid_packages
if
hasattr
(
p
,
'id'
)]
data
[
'valid_package_ids'
]
=
[
p
.
id
for
p
in
valid_packages
if
hasattr
(
p
,
'id'
)]
else
:
else
:
data
[
'valid_package_ids'
]
=
[]
data
[
'valid_package_ids'
]
=
[]
_logger
.
info
(
f
"序列化时保存valid_packages: 提单 {bl.bl_no}, 满足条件的小包ID: {data['valid_package_ids']}"
)
_logger
.
info
(
f
"序列化时保存valid_packages: 提单 {bl.bl_no}, 满足条件的小包ID: {data['valid_package_ids']}"
)
serialized_data
.
append
(
data
)
serialized_data
.
append
(
data
)
return
json
.
dumps
(
serialized_data
,
ensure_ascii
=
False
)
return
json
.
dumps
(
serialized_data
,
ensure_ascii
=
False
)
...
@@ -2382,8 +2407,9 @@ class BatchGetPodInfoWizard(models.TransientModel):
...
@@ -2382,8 +2407,9 @@ class BatchGetPodInfoWizard(models.TransientModel):
if
attachment
.
exists
():
if
attachment
.
exists
():
# attachment.datas 已经是 base64 编码的字符串
# attachment.datas 已经是 base64 编码的字符串
file_data
=
attachment
.
datas
file_data
=
attachment
.
datas
_logger
.
info
(
f
"从附件读取文件: {attachment.name}, ID: {attachment_id}, 数据长度: {len(file_data) if file_data else 0}"
)
_logger
.
info
(
f
"从附件读取文件: {attachment.name}, ID: {attachment_id}, 数据长度: {len(file_data) if file_data else 0}"
)
# 验证数据格式
# 验证数据格式
if
file_data
:
if
file_data
:
_logger
.
info
(
f
"附件数据格式: 前100个字符: {file_data[:100]}"
)
_logger
.
info
(
f
"附件数据格式: 前100个字符: {file_data[:100]}"
)
...
@@ -2403,7 +2429,7 @@ class BatchGetPodInfoWizard(models.TransientModel):
...
@@ -2403,7 +2429,7 @@ class BatchGetPodInfoWizard(models.TransientModel):
_logger
.
error
(
f
"读取附件失败: {str(e)}"
)
_logger
.
error
(
f
"读取附件失败: {str(e)}"
)
else
:
else
:
_logger
.
warning
(
f
"提单 {bl.bl_no} 没有附件ID,无法读取文件数据"
)
_logger
.
warning
(
f
"提单 {bl.bl_no} 没有附件ID,无法读取文件数据"
)
file_info
=
{
file_info
=
{
'bl'
:
bl
,
'bl'
:
bl
,
'bl_no'
:
data
.
get
(
'bl_no'
,
''
),
'bl_no'
:
data
.
get
(
'bl_no'
,
''
),
...
@@ -2419,7 +2445,8 @@ class BatchGetPodInfoWizard(models.TransientModel):
...
@@ -2419,7 +2445,8 @@ class BatchGetPodInfoWizard(models.TransientModel):
# 重建记录集对象
# 重建记录集对象
valid_packages
=
self
.
env
[
'cc.ship.package'
]
.
browse
(
valid_package_ids
)
valid_packages
=
self
.
env
[
'cc.ship.package'
]
.
browse
(
valid_package_ids
)
file_info
[
'valid_packages'
]
=
valid_packages
file_info
[
'valid_packages'
]
=
valid_packages
_logger
.
info
(
f
"反序列化时恢复valid_packages: 提单 {bl.bl_no}, 满足条件的小包ID: {valid_package_ids}, 数量: {len(valid_packages)}"
)
_logger
.
info
(
f
"反序列化时恢复valid_packages: 提单 {bl.bl_no}, 满足条件的小包ID: {valid_package_ids}, 数量: {len(valid_packages)}"
)
processed_files
.
append
(
file_info
)
processed_files
.
append
(
file_info
)
return
processed_files
return
processed_files
except
Exception
as
e
:
except
Exception
as
e
:
...
@@ -2435,7 +2462,7 @@ class BatchGetPodInfoWizard(models.TransientModel):
...
@@ -2435,7 +2462,7 @@ class BatchGetPodInfoWizard(models.TransientModel):
try
:
try
:
# 计算1天前的时间(前一天23:59:59)
# 计算1天前的时间(前一天23:59:59)
today
=
datetime
.
now
()
.
replace
(
hour
=
0
,
minute
=
0
,
second
=
0
,
microsecond
=
0
)
today
=
datetime
.
now
()
.
replace
(
hour
=
0
,
minute
=
0
,
second
=
0
,
microsecond
=
0
)
one_day_ago
=
today
+
timedelta
(
days
=
2
)
-
timedelta
(
seconds
=
1
)
# 前一天23:59:59
one_day_ago
=
today
+
timedelta
(
days
=
2
)
-
timedelta
(
seconds
=
1
)
# 前一天23:59:59
_logger
.
info
(
f
"开始执行定时清理临时附件任务,清理时间点: {one_day_ago.strftime('
%
Y-
%
m-
%
d
%
H:
%
M:
%
S')}"
)
_logger
.
info
(
f
"开始执行定时清理临时附件任务,清理时间点: {one_day_ago.strftime('
%
Y-
%
m-
%
d
%
H:
%
M:
%
S')}"
)
# 构建SQL查询
# 构建SQL查询
sql_query
=
"""
sql_query
=
"""
...
@@ -2445,7 +2472,7 @@ class BatchGetPodInfoWizard(models.TransientModel):
...
@@ -2445,7 +2472,7 @@ class BatchGetPodInfoWizard(models.TransientModel):
AND create_date < '
%
s'
AND create_date < '
%
s'
ORDER BY create_date DESC
ORDER BY create_date DESC
"""
%
(
one_day_ago
.
strftime
(
'
%
Y-
%
m-
%
d
%
H:
%
M:
%
S'
))
"""
%
(
one_day_ago
.
strftime
(
'
%
Y-
%
m-
%
d
%
H:
%
M:
%
S'
))
# 执行SQL查询
# 执行SQL查询
self
.
env
.
cr
.
execute
(
sql_query
)
self
.
env
.
cr
.
execute
(
sql_query
)
sql_results
=
self
.
env
.
cr
.
fetchall
()
sql_results
=
self
.
env
.
cr
.
fetchall
()
...
@@ -2456,9 +2483,9 @@ class BatchGetPodInfoWizard(models.TransientModel):
...
@@ -2456,9 +2483,9 @@ class BatchGetPodInfoWizard(models.TransientModel):
attachment_count
=
len
(
temp_attachments
)
attachment_count
=
len
(
temp_attachments
)
attachment_names
=
[
att
.
name
for
att
in
temp_attachments
]
attachment_names
=
[
att
.
name
for
att
in
temp_attachments
]
_logger
.
info
(
f
"找到 {attachment_count} 个{one_day_ago.strftime('
%
Y-
%
m-
%
d')}之前创建的临时附件,开始清理"
)
_logger
.
info
(
f
"找到 {attachment_count} 个{one_day_ago.strftime('
%
Y-
%
m-
%
d')}之前创建的临时附件,开始清理"
)
# 删除物理文件
# 删除物理文件
for
attachment
in
temp_attachments
:
for
attachment
in
temp_attachments
:
try
:
try
:
...
@@ -2472,24 +2499,23 @@ class BatchGetPodInfoWizard(models.TransientModel):
...
@@ -2472,24 +2499,23 @@ class BatchGetPodInfoWizard(models.TransientModel):
else
:
else
:
# 尝试从 name 字段构建路径
# 尝试从 name 字段构建路径
file_path
=
attachment
.
name
file_path
=
attachment
.
name
# 构建完整的文件路径
# 构建完整的文件路径
import
os
import
os
from
odoo.tools
import
config
from
odoo.tools
import
config
# 获取 Odoo 数据目录
# 获取 Odoo 数据目录
data_dir
=
config
.
filestore
(
self
.
env
.
cr
.
dbname
)
data_dir
=
config
.
filestore
(
self
.
env
.
cr
.
dbname
)
if
data_dir
and
file_path
:
if
data_dir
and
file_path
:
full_path
=
os
.
path
.
join
(
data_dir
,
file_path
)
full_path
=
os
.
path
.
join
(
data_dir
,
file_path
)
if
os
.
path
.
exists
(
full_path
):
if
os
.
path
.
exists
(
full_path
):
os
.
remove
(
full_path
)
os
.
remove
(
full_path
)
except
Exception
as
file_e
:
except
Exception
as
file_e
:
_logger
.
warning
(
f
"删除物理文件失败 {attachment.name}: {str(file_e)}"
)
_logger
.
warning
(
f
"删除物理文件失败 {attachment.name}: {str(file_e)}"
)
# 删除数据库记录
# 删除数据库记录
temp_attachments
.
unlink
()
temp_attachments
.
unlink
()
except
Exception
as
e
:
except
Exception
as
e
:
_logger
.
error
(
f
"定时清理临时附件失败: {str(e)}"
)
_logger
.
error
(
f
"定时清理临时附件失败: {str(e)}"
)
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论