Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
H
hh_ccs
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
贺阳
hh_ccs
Commits
46bba812
提交
46bba812
authored
11月 03, 2025
作者:
贺阳
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
优化分批处理,减少内存
上级
5ab84684
显示空白字符变更
内嵌
并排
正在显示
1 个修改的文件
包含
353 行增加
和
19 行删除
+353
-19
batch_get_pod_info_wizard.py
ccs_base/wizard/batch_get_pod_info_wizard.py
+353
-19
没有找到文件。
ccs_base/wizard/batch_get_pod_info_wizard.py
浏览文件 @
46bba812
...
...
@@ -367,11 +367,16 @@ class BatchGetPodInfoWizard(models.TransientModel):
def
_merge_pdf_files
(
self
,
processed_files
):
"""
合并所有涂抹后的PDF文件为一个PDF并保存到pdf_file字段
使用临时文件方式减少内存占用
:param processed_files: 处理后的文件数组
"""
import
fitz
# PyMuPDF
from
datetime
import
datetime
import
tempfile
import
os
import
gc
temp_file_path
=
None
try
:
# 过滤有效的PDF文件
valid_files
=
[]
...
...
@@ -406,16 +411,24 @@ class BatchGetPodInfoWizard(models.TransientModel):
# 多个PDF文件需要合并
_logger
.
info
(
f
"开始合并 {len(valid_files)} 个PDF文件"
)
# 创建新的PDF文档用于合并
# 使用临时文件方式合并,避免内存占用过大
temp_file_path
=
tempfile
.
mktemp
(
suffix
=
'.pdf'
)
merged_pdf
=
fitz
.
open
()
bl_numbers
=
[]
# 遍历所有处理后的PDF文件
for
file_info
in
valid_files
:
# 遍历所有处理后的PDF文件,分批处理以减少内存占用
batch_size
=
5
# 每批处理5个PDF
for
batch_start
in
range
(
0
,
len
(
valid_files
),
batch_size
):
batch_files
=
valid_files
[
batch_start
:
batch_start
+
batch_size
]
_logger
.
info
(
f
"处理第 {batch_start // batch_size + 1} 批,共 {len(batch_files)} 个PDF"
)
for
file_info
in
batch_files
:
bl
=
file_info
[
'bl'
]
file_data
=
file_info
[
'file_data'
]
bl_numbers
.
append
(
bl
.
bl_no
)
source_pdf
=
None
pdf_binary
=
None
try
:
# 将base64数据转换为二进制
pdf_binary
=
base64
.
b64decode
(
file_data
)
...
...
@@ -426,21 +439,46 @@ class BatchGetPodInfoWizard(models.TransientModel):
# 将源PDF的所有页面插入到合并的PDF中
merged_pdf
.
insert_pdf
(
source_pdf
)
source_pdf
.
close
(
)
_logger
.
info
(
f
"已添加提单 {bl.bl_no} 的PDF到合并文档"
)
_logger
.
info
(
f
"已添加提单 {bl.bl_no} 的PDF到合并文档({len(source_pdf)} 页)"
)
except
Exception
as
e
:
_logger
.
error
(
f
"合并提单 {bl.bl_no} 的PDF失败: {str(e)}"
)
continue
finally
:
# 立即释放资源
if
source_pdf
:
source_pdf
.
close
()
source_pdf
=
None
pdf_binary
=
None
gc
.
collect
()
# 强制垃圾回收
# 每批处理完后,保存到临时文件并释放内存
if
batch_start
+
batch_size
<
len
(
valid_files
):
# 保存当前合并结果到临时文件
merged_pdf
.
save
(
temp_file_path
,
garbage
=
4
,
deflate
=
True
,
clean
=
True
)
merged_pdf
.
close
()
# 重新打开临时文件继续合并
merged_pdf
=
fitz
.
open
(
temp_file_path
)
gc
.
collect
()
# 如果有页面,保存合并后的PDF
if
len
(
merged_pdf
)
>
0
:
# 保存到内存
output_buffer
=
io
.
BytesIO
()
merged_pdf
.
save
(
output_buffer
,
garbage
=
4
,
deflate
=
True
,
clean
=
True
)
# 使用临时文件保存,减少内存占用
if
not
temp_file_path
:
temp_file_path
=
tempfile
.
mktemp
(
suffix
=
'.pdf'
)
merged_pdf
.
save
(
temp_file_path
,
garbage
=
4
,
deflate
=
True
,
clean
=
True
)
merged_pdf
.
close
()
# 从临时文件读取并转换为base64
with
open
(
temp_file_path
,
'rb'
)
as
f
:
pdf_data
=
f
.
read
()
# 转换为base64
merged_pdf_base64
=
base64
.
b64encode
(
output_buffer
.
getvalue
())
.
decode
(
'utf-8'
)
merged_pdf_base64
=
base64
.
b64encode
(
pdf_data
)
.
decode
(
'utf-8'
)
# 清理临时数据
del
pdf_data
gc
.
collect
()
# 生成文件名(包含提单号和日期)
bl_numbers_str
=
'_'
.
join
(
bl_numbers
[:
5
])
# 最多显示5个提单号
...
...
@@ -455,12 +493,24 @@ class BatchGetPodInfoWizard(models.TransientModel):
'pdf_filename'
:
pdf_filename
})
# 清理base64数据
del
merged_pdf_base64
gc
.
collect
()
_logger
.
info
(
f
"成功合并 {len(bl_numbers)} 个PDF文件,文件名: {pdf_filename}"
)
else
:
_logger
.
warning
(
"没有有效的PDF文件可以合并"
)
except
Exception
as
e
:
_logger
.
error
(
f
"合并PDF文件失败: {str(e)}"
)
finally
:
# 清理临时文件
if
temp_file_path
and
os
.
path
.
exists
(
temp_file_path
):
try
:
os
.
remove
(
temp_file_path
)
_logger
.
info
(
f
"已删除临时文件: {temp_file_path}"
)
except
Exception
as
e
:
_logger
.
warning
(
f
"删除临时文件失败: {str(e)}"
)
def
_match_bl_by_file_name
(
self
,
pdf_file_arr
):
"""
...
...
@@ -836,6 +886,7 @@ class BatchGetPodInfoWizard(models.TransientModel):
def
_process_pdf_with_ai_image_edit
(
self
,
pdf_data
,
bl_no
):
"""
使用AI图片编辑处理PDF:PDF转图片 -> AI抹除文字 -> 图片转回PDF(按照image-to-coordinate.py的逻辑)
优化内存占用:对于多页PDF使用临时文件方式分批处理
:param pdf_data: PDF二进制数据
:param bl_no: 提单号(用于日志)
:return: 处理后的PDF二进制数据
...
...
@@ -843,6 +894,9 @@ class BatchGetPodInfoWizard(models.TransientModel):
import
fitz
# PyMuPDF
import
base64
import
mimetypes
import
gc
import
os
import
tempfile
from
PIL
import
Image
import
time
...
...
@@ -854,12 +908,22 @@ class BatchGetPodInfoWizard(models.TransientModel):
# 打开PDF文档
pdf_document
=
fitz
.
open
(
stream
=
pdf_data
,
filetype
=
"pdf"
)
processed_images
=
[]
# 存储处理后的PIL图片对象
total_pages
=
len
(
pdf_document
)
total_ai_time
=
0.0
# 累计AI总耗时
_logger
.
info
(
f
"PDF总页数: {total_pages}"
)
# 对于多页PDF,使用临时文件方式减少内存占用
use_temp_file
=
total_pages
>
5
# 超过5页使用临时文件
temp_file_path
=
None
if
use_temp_file
:
import
tempfile
temp_file_path
=
tempfile
.
mktemp
(
suffix
=
'.pdf'
)
_logger
.
info
(
f
"使用临时文件方式处理,减少内存占用: {temp_file_path}"
)
processed_images
=
[]
# 存储处理后的PIL图片对象(分批处理)
batch_size
=
5
# 每批处理5页图片
# 遍历每一页(按照image-to-coordinate.py的逻辑)
for
page_num
in
range
(
total_pages
):
page_start_time
=
time
.
time
()
...
...
@@ -867,12 +931,22 @@ class BatchGetPodInfoWizard(models.TransientModel):
_logger
.
info
(
f
"正在处理第{page_num + 1}页"
)
# 将页面转换为图像(按照image-to-coordinate.py的pdf_to_images函数,使用dpi=150)
dpi
=
150
# 对于内存优化,使用稍低的分辨率(120 DPI)以避免内存问题
dpi
=
120
mat
=
fitz
.
Matrix
(
dpi
/
72
,
dpi
/
72
)
pix
=
None
img
=
None
img_bytes_io
=
None
try
:
pix
=
page
.
get_pixmap
(
matrix
=
mat
)
# 将pixmap转换为PIL Image对象
img_data
=
pix
.
tobytes
(
"png"
)
del
pix
# 立即释放pixmap以节省内存
pix
=
None
gc
.
collect
()
# 强制垃圾回收
img
=
Image
.
open
(
io
.
BytesIO
(
img_data
))
# 获取图片尺寸(按照image-to-coordinate.py的逻辑)
...
...
@@ -884,8 +958,10 @@ class BatchGetPodInfoWizard(models.TransientModel):
img
.
save
(
img_bytes_io
,
format
=
'PNG'
)
img_bytes_io
.
seek
(
0
)
encoded_string
=
base64
.
b64encode
(
img_bytes_io
.
read
())
.
decode
(
'utf-8'
)
mime_type
=
'image/png'
img_base64
=
f
"data:{mime_type};base64,{encoded_string}"
img_bytes_io
.
close
()
# 立即关闭BytesIO
img_bytes_io
=
None
del
img_data
# 释放图片数据
gc
.
collect
()
# 强制垃圾回收
# 使用AI编辑图片,移除指定文字(带重试机制)
edited_img_base64
=
None
...
...
@@ -924,10 +1000,16 @@ class BatchGetPodInfoWizard(models.TransientModel):
_logger
.
info
(
f
"第{page_num + 1}页将进行第{attempt + 1}次重试"
)
edited_img_base64
=
None
# 释放encoded_string以节省内存
del
encoded_string
gc
.
collect
()
if
edited_img_base64
:
# 解码base64图片数据并转换为PIL Image对象(按照image-to-coordinate.py的逻辑)
edited_img_data
=
base64
.
b64decode
(
edited_img_base64
)
edited_img
=
Image
.
open
(
io
.
BytesIO
(
edited_img_data
))
.
convert
(
'RGB'
)
del
edited_img_data
# 立即释放原始数据
del
edited_img_base64
# 释放base64字符串
processed_images
.
append
(
edited_img
)
_logger
.
info
(
f
"第{page_num + 1}页AI处理最终成功,总耗时: {ai_processing_time:.2f}秒"
)
else
:
...
...
@@ -935,6 +1017,86 @@ class BatchGetPodInfoWizard(models.TransientModel):
# 如果AI处理失败,使用原始图片
processed_images
.
append
(
img
.
convert
(
'RGB'
))
# 释放原始图片对象
if
img
:
img
.
close
()
del
img
img
=
None
gc
.
collect
()
# 强制垃圾回收
# 分批处理:每处理batch_size页,就转换为PDF并保存到临时文件
if
use_temp_file
and
len
(
processed_images
)
>=
batch_size
:
_logger
.
info
(
f
"达到批次大小 {batch_size},开始保存到临时文件"
)
try
:
# 将已处理的图片转换为PDF
batch_buffer
=
io
.
BytesIO
()
first_batch
=
processed_images
[
0
]
rest_batch
=
processed_images
[
1
:]
first_batch
.
save
(
batch_buffer
,
format
=
'PDF'
,
save_all
=
True
,
append_images
=
rest_batch
)
batch_buffer
.
seek
(
0
)
pdf_bytes
=
batch_buffer
.
getvalue
()
batch_buffer
.
close
()
# 释放已处理的图片
for
img_obj
in
processed_images
:
if
img_obj
:
img_obj
.
close
()
processed_images
=
[]
gc
.
collect
()
if
os
.
path
.
exists
(
temp_file_path
)
and
os
.
path
.
getsize
(
temp_file_path
)
>
0
:
# 追加到现有PDF:先读取现有内容,合并后保存到新文件,再替换
with
open
(
temp_file_path
,
'rb'
)
as
f
:
existing_bytes
=
f
.
read
()
existing_pdf
=
fitz
.
open
(
stream
=
existing_bytes
,
filetype
=
"pdf"
)
new_pdf
=
fitz
.
open
(
stream
=
pdf_bytes
,
filetype
=
"pdf"
)
existing_pdf
.
insert_pdf
(
new_pdf
)
new_pdf
.
close
()
# 保存到新临时文件,避免"save to original must be incremental"错误
new_temp_path
=
tempfile
.
mktemp
(
suffix
=
'.pdf'
)
existing_pdf
.
save
(
new_temp_path
,
garbage
=
4
,
deflate
=
True
,
clean
=
True
)
existing_pdf
.
close
()
# 替换旧文件
os
.
remove
(
temp_file_path
)
os
.
rename
(
new_temp_path
,
temp_file_path
)
# 释放资源
del
existing_bytes
del
pdf_bytes
gc
.
collect
()
else
:
# 创建新的PDF
with
open
(
temp_file_path
,
'wb'
)
as
f
:
f
.
write
(
pdf_bytes
)
del
pdf_bytes
gc
.
collect
()
except
Exception
as
e
:
_logger
.
error
(
f
"分批保存PDF失败: {str(e)}"
)
# 失败时继续处理,最后统一处理
# 但需要释放已处理的图片,避免内存占用
for
img_obj
in
processed_images
:
if
img_obj
:
img_obj
.
close
()
processed_images
=
[]
gc
.
collect
()
except
Exception
as
e
:
_logger
.
error
(
f
"第{page_num + 1}页处理异常: {str(e)}"
)
# 确保资源被释放
if
pix
:
del
pix
if
img
:
img
.
close
()
del
img
if
img_bytes_io
:
img_bytes_io
.
close
()
gc
.
collect
()
# 如果处理失败,跳过这一页或使用原始页面
continue
page_end_time
=
time
.
time
()
page_processing_time
=
page_end_time
-
page_start_time
_logger
.
info
(
f
"第{page_num + 1}页总处理时间: {page_processing_time:.2f}秒"
)
...
...
@@ -943,11 +1105,85 @@ class BatchGetPodInfoWizard(models.TransientModel):
# 将处理后的图片转换为PDF(按照image-to-coordinate.py的images_to_pdf函数逻辑)
pdf_creation_start
=
time
.
time
()
if
not
processed_images
:
_logger
.
error
(
"没有需要写入PDF的图片"
)
result_data
=
None
import
os
try
:
if
use_temp_file
and
temp_file_path
:
# 如果还有剩余的图片,追加到临时文件
if
processed_images
:
_logger
.
info
(
f
"处理剩余的 {len(processed_images)} 页图片"
)
try
:
# 将剩余图片转换为PDF
batch_buffer
=
io
.
BytesIO
()
first_batch
=
processed_images
[
0
]
rest_batch
=
processed_images
[
1
:]
first_batch
.
save
(
batch_buffer
,
format
=
'PDF'
,
save_all
=
True
,
append_images
=
rest_batch
)
batch_buffer
.
seek
(
0
)
temp_pdf_bytes
=
batch_buffer
.
getvalue
()
batch_buffer
.
close
()
# 释放图片
for
img_obj
in
processed_images
:
if
img_obj
:
img_obj
.
close
()
processed_images
=
None
gc
.
collect
()
# 追加到临时文件
if
os
.
path
.
exists
(
temp_file_path
)
and
os
.
path
.
getsize
(
temp_file_path
)
>
0
:
# 如果临时文件已存在,先读取内容
with
open
(
temp_file_path
,
'rb'
)
as
f
:
existing_pdf_bytes
=
f
.
read
()
# 合并PDF:打开现有PDF和新PDF,然后合并
existing_pdf
=
fitz
.
open
(
stream
=
existing_pdf_bytes
,
filetype
=
"pdf"
)
new_pdf
=
fitz
.
open
(
stream
=
temp_pdf_bytes
,
filetype
=
"pdf"
)
existing_pdf
.
insert_pdf
(
new_pdf
)
new_pdf
.
close
()
# 保存到新的临时文件,避免"save to original must be incremental"错误
new_temp_path
=
tempfile
.
mktemp
(
suffix
=
'.pdf'
)
existing_pdf
.
save
(
new_temp_path
,
garbage
=
4
,
deflate
=
True
,
clean
=
True
)
existing_pdf
.
close
()
# 删除旧临时文件,重命名新文件
os
.
remove
(
temp_file_path
)
os
.
rename
(
new_temp_path
,
temp_file_path
)
# 释放资源
del
existing_pdf_bytes
del
temp_pdf_bytes
gc
.
collect
()
else
:
# 如果临时文件不存在或为空,直接写入
with
open
(
temp_file_path
,
'wb'
)
as
f
:
f
.
write
(
temp_pdf_bytes
)
del
temp_pdf_bytes
gc
.
collect
()
except
Exception
as
e
:
_logger
.
error
(
f
"追加剩余图片失败: {str(e)}"
)
# 注意:processed_images 在这里已经被释放了,需要重新获取
# 如果还有剩余图片,需要重新处理(这种情况不应该发生,因为前面已经释放了)
_logger
.
warning
(
"追加剩余图片失败,剩余图片已在之前释放"
)
# 从临时文件读取最终结果
if
os
.
path
.
exists
(
temp_file_path
):
with
open
(
temp_file_path
,
'rb'
)
as
f
:
result_data
=
f
.
read
()
# 删除临时文件
try
:
os
.
remove
(
temp_file_path
)
_logger
.
info
(
f
"已删除临时文件: {temp_file_path}"
)
except
Exception
as
e
:
_logger
.
warning
(
f
"删除临时文件失败: {str(e)}"
)
else
:
_logger
.
error
(
"临时文件不存在,无法读取结果"
)
return
None
# 使用PIL的save方法将图片保存为PDF(按照image-to-coordinate.py的逻辑)
elif
processed_images
:
# 使用内存方式处理(5页以内)
output_buffer
=
io
.
BytesIO
()
first
=
processed_images
[
0
]
rest
=
processed_images
[
1
:]
# 按照image-to-coordinate.py的逻辑,直接使用切片
...
...
@@ -955,9 +1191,40 @@ class BatchGetPodInfoWizard(models.TransientModel):
# 即使rest是空列表,也直接传入(PIL会正确处理)
first
.
save
(
output_buffer
,
format
=
'PDF'
,
save_all
=
True
,
append_images
=
rest
)
output_buffer
.
seek
(
0
)
pdf_creation_end
=
time
.
time
()
result_data
=
output_buffer
.
getvalue
()
output_buffer
.
close
()
# 释放所有图片对象
for
img_obj
in
processed_images
:
if
img_obj
:
img_obj
.
close
()
processed_images
=
None
del
first
del
rest
else
:
_logger
.
error
(
"没有需要写入PDF的图片"
)
return
None
gc
.
collect
()
# 强制垃圾回收
except
Exception
as
e
:
_logger
.
error
(
f
"PDF创建失败: {str(e)}"
)
# 确保资源被释放
if
processed_images
:
for
img_obj
in
processed_images
:
if
img_obj
:
img_obj
.
close
()
if
temp_file_path
and
os
.
path
.
exists
(
temp_file_path
):
try
:
os
.
remove
(
temp_file_path
)
except
:
pass
gc
.
collect
()
return
None
pdf_creation_end
=
time
.
time
()
total_time
=
time
.
time
()
-
start_time
pdf_creation_time
=
pdf_creation_end
-
pdf_creation_start
...
...
@@ -1664,6 +1931,7 @@ class BatchGetPodInfoWizard(models.TransientModel):
import
numpy
as
np
from
PIL
import
Image
import
re
import
gc
# 定义目标文字(与_find_target_texts一致)
TARGET_TEXTS
=
[
'AGN'
,
'ACN'
,
'UCLINK LOGISITICS LTD'
,
'UCLINK LOGISITICS'
,
'UCLINK'
,
'LOGISITICS'
,
'LOGISTICS'
,
'LTD'
,
...
...
@@ -1671,6 +1939,7 @@ class BatchGetPodInfoWizard(models.TransientModel):
EXCLUDE_TEXTS
=
[
'AIR EQK'
,
'ARN'
,
'EQK'
,
'AIR'
,
'Page 1 of 1'
,
'Page 2 of 2'
,
'Page 3 of 3'
,
'Page 4 of 4'
,
'Page 5 of 5'
]
pdf_document
=
None
try
:
# 设置Tesseract路径
self
.
_setup_tesseract_path
()
...
...
@@ -1689,25 +1958,45 @@ class BatchGetPodInfoWizard(models.TransientModel):
# 遍历每一页
for
page_num
in
range
(
len
(
pdf_document
)):
page
=
pdf_document
[
page_num
]
pix
=
None
pil_img
=
None
img
=
None
nparr
=
None
img_data
=
None
try
:
# 首先尝试从PDF文本层提取(如果是文本型PDF)
page_text_pdf
=
page
.
get_text
()
.
upper
()
# 将页面转换为图像进行OCR识别
mat
=
fitz
.
Matrix
(
3.0
,
3.0
)
# 进一步提高分辨率,从2.0提升到3.0
# 将页面转换为图像进行OCR识别(降低分辨率以节省内存)
# 使用 2.0 倍分辨率(约 144 DPI)而不是 3.0 倍(约 216 DPI)
mat
=
fitz
.
Matrix
(
2.0
,
2.0
)
pix
=
page
.
get_pixmap
(
matrix
=
mat
)
img_data
=
pix
.
tobytes
(
"png"
)
del
pix
# 立即释放pixmap
pix
=
None
gc
.
collect
()
# 强制垃圾回收
# 转换为PIL图像
if
cv2_available
:
nparr
=
np
.
frombuffer
(
img_data
,
np
.
uint8
)
img
=
cv2
.
imdecode
(
nparr
,
cv2
.
IMREAD_COLOR
)
pil_img
=
Image
.
fromarray
(
cv2
.
cvtColor
(
img
,
cv2
.
COLOR_BGR2RGB
))
del
nparr
# 释放numpy数组
del
img
# 释放OpenCV图像
nparr
=
None
img
=
None
gc
.
collect
()
else
:
pil_img
=
Image
.
open
(
io
.
BytesIO
(
img_data
))
if
pil_img
.
mode
!=
'RGB'
:
pil_img
=
pil_img
.
convert
(
'RGB'
)
# 释放img_data
del
img_data
img_data
=
None
gc
.
collect
()
# OCR识别
try
:
config
=
'--psm 6 --oem 1 -c tessedit_char_whitelist=ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789.,- -c preserve_interword_spaces=1'
...
...
@@ -1740,11 +2029,19 @@ class BatchGetPodInfoWizard(models.TransientModel):
}
})
# 释放words字典以节省内存
del
words
gc
.
collect
()
# 使用与_find_target_texts相同的匹配逻辑
page_found_texts
=
self
.
_find_target_texts
(
valid_words
,
page_num
,
800
,
600
,
800
,
600
)
del
valid_words
# 释放valid_words列表
gc
.
collect
()
if
page_found_texts
:
for
found_text
in
page_found_texts
:
found_texts
.
append
(
f
"第{page_num + 1}页: {found_text['text']}"
)
break
# 找到就跳出,避免重复
except
Exception
as
e
:
_logger
.
warning
(
f
"OCR单词识别失败,第{page_num + 1}页,使用文本匹配: {str(e)}"
)
...
...
@@ -1792,7 +2089,37 @@ class BatchGetPodInfoWizard(models.TransientModel):
found_texts
.
append
(
f
"第{page_num + 1}页: {target_text}"
)
break
# 找到就跳出,避免重复
# 释放PIL图像和文本变量
if
pil_img
:
pil_img
.
close
()
del
pil_img
del
page_text_pdf
del
ocr_text
del
combined_text
pil_img
=
None
gc
.
collect
()
# 强制垃圾回收
except
Exception
as
e
:
_logger
.
error
(
f
"第{page_num + 1}页处理异常: {str(e)}"
)
# 确保资源被释放
if
pix
:
del
pix
if
pil_img
:
pil_img
.
close
()
del
pil_img
if
img
is
not
None
:
del
img
if
nparr
is
not
None
:
del
nparr
if
img_data
is
not
None
:
del
img_data
gc
.
collect
()
continue
if
pdf_document
:
pdf_document
.
close
()
pdf_document
=
None
gc
.
collect
()
if
found_texts
:
_logger
.
warning
(
f
"提单 {bl_no} 仍存在目标文字: {', '.join(found_texts)}"
)
...
...
@@ -1803,6 +2130,13 @@ class BatchGetPodInfoWizard(models.TransientModel):
except
Exception
as
e
:
_logger
.
error
(
f
"检查目标文字失败,提单号: {bl_no}, 错误: {str(e)}"
)
# 确保资源被释放
if
pdf_document
:
try
:
pdf_document
.
close
()
except
:
pass
gc
.
collect
()
# 检查失败时,假设不存在(避免误报)
return
False
,
[]
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论