提交 4777943a authored 作者: 伍姿英's avatar 伍姿英

Merge branch 'release/3.5.0'

......@@ -25,6 +25,7 @@
'wizard/add_exception_info_wizard_views.xml',
'wizard/email_template.xml',
'wizard/bl_done_wizard_views.xml',
'wizard/batch_get_pod_info_wizard_views.xml',
'data/data.xml',
'data/timer.xml',
'data/sequence.xml',
......
......@@ -40,5 +40,14 @@
<field name="value">mjgUUgbxXK8UHcRi5MTlPrb4BWM8NrOu</field>
</record>
<record id="last_mile_pod_api_url" model="ir.config_parameter">
<field name="key">last_mile_pod_api_url</field>
<field name="value">http://172.104.52.150:7002</field>
</record>
<record id="time_warning_hours" model="ir.config_parameter">
<field name="key">time_warning_hours</field>
<field name="value">24</field>
</record>
</data>
</odoo>
\ No newline at end of file
......@@ -6,11 +6,11 @@ msgid ""
msgstr ""
"Project-Id-Version: Odoo Server 16.0\n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2025-10-11 07:55+0000\n"
"PO-Revision-Date: 2025-10-11 15:59+0800\n"
"POT-Creation-Date: 2025-10-17 07:22+0000\n"
"PO-Revision-Date: 2025-10-17 15:27+0800\n"
"Last-Translator: \n"
"Language-Team: \n"
"Language: zh\n"
"Language: zh_CN\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
......@@ -58,6 +58,13 @@ msgid ""
"is: %s, not sent %s email"
msgstr "%s %s 操作了异常信息,异常原因:%s,未发送%s邮件"
#. module: ccs_base
#. odoo-python
#: code:addons/ccs_base/wizard/batch_get_pod_info_wizard.py:0
#, python-format
msgid "%s bill of loading cannot find release note file"
msgstr "%s 提单无法找到release note文件"
#. module: ccs_base
#: model:mail.template,body_html:ccs_base.email_template_exception_notification_en
msgid ""
......@@ -202,6 +209,28 @@ msgstr ""
"系统无法识别。\n"
" </span>"
#. module: ccs_base
#: model_terms:ir.ui.view,arch_db:ccs_base.view_batch_get_pod_info_wizard_form
msgid "<strong>Description:</strong>"
msgstr "<strong>说明:</strong>"
#. module: ccs_base
#: model_terms:ir.ui.view,arch_db:ccs_base.view_batch_get_pod_info_wizard_form
msgid ""
"<strong>Remove Specified Text:</strong> Remove specified text (AGN, UCLINK "
"LOGISITICS LTD) from PDF files"
msgstr ""
"<strong>涂抹指定文字:</strong>从 PDF 文件中删除指定文本(AGN、UCLINK "
"LOGISITICS LTD)。"
#. module: ccs_base
#: model_terms:ir.ui.view,arch_db:ccs_base.view_batch_get_pod_info_wizard_form
msgid ""
"<strong>Sync Last Mile POD:</strong> Synchronize POD (Proof of Delivery) "
"attachment information with TK system, including big package quantities and "
"container numbers"
msgstr "同步尾程POD:向TK同步尾程交接POD(待大包数量和箱号)的附件信息"
#. module: ccs_base
#: model:ir.model.fields,help:ccs_base.field_batch_input_ship_package_status_wizard__exclude_tracking_no
#: model:ir.model.fields,help:ccs_base.field_batch_input_ship_package_status_wizard__select_tracking_no
......@@ -228,6 +257,34 @@ msgstr "地址2"
msgid "ADD 3"
msgstr "地址3"
#. module: ccs_base
#. odoo-python
#: code:addons/ccs_base/wizard/batch_get_pod_info_wizard.py:0
#, python-format
msgid "API URL not configured"
msgstr "未配置 API URL"
#. module: ccs_base
#. odoo-python
#: code:addons/ccs_base/wizard/batch_get_pod_info_wizard.py:0
#, python-format
msgid "API request failed: %s"
msgstr "API 请求失败:%s"
#. module: ccs_base
#. odoo-python
#: code:addons/ccs_base/wizard/batch_get_pod_info_wizard.py:0
#, python-format
msgid "API returned empty response"
msgstr "API 返回空响应"
#. module: ccs_base
#. odoo-python
#: code:addons/ccs_base/wizard/batch_get_pod_info_wizard.py:0
#, python-format
msgid "API returned error: %s"
msgstr "API 返回错误:%s"
#. module: ccs_base
#: model:ir.model.fields,field_description:ccs_base.field_cc_last_mile_provider__abbreviation
msgid "Abbreviation"
......@@ -435,6 +492,13 @@ msgstr "添加包裹异常信息"
msgid "Add Package Exception Information Wizard"
msgstr "添加包裹异常信息向导"
#. module: ccs_base
#. odoo-python
#: code:addons/ccs_base/wizard/batch_get_pod_info_wizard.py:0
#, python-format
msgid "All API PDF files failed validation"
msgstr "所有API PDF文件验证都失败"
#. module: ccs_base
#: model:ir.model.fields,field_description:ccs_base.field_cc_history_package_sync_log__api_customer
msgid "Api Customer"
......@@ -541,6 +605,21 @@ msgstr "批量添加异常信息"
msgid "Batch Complete"
msgstr "批量完成"
#. module: ccs_base
#. odoo-python
#: code:addons/ccs_base/models/cc_bill_loading.py:0
#: model:ir.actions.act_window,name:ccs_base.action_batch_get_pod_info_wizard
#: model:ir.actions.server,name:ccs_base.bl_get_pod_info_server_action
#: model_terms:ir.ui.view,arch_db:ccs_base.view_batch_get_pod_info_wizard_form
#, python-format
msgid "Batch Get POD Info"
msgstr "批量获取尾程POD"
#. module: ccs_base
#: model:ir.model,name:ccs_base.model_batch_get_pod_info_wizard
msgid "Batch Get POD Info Wizard"
msgstr "批量获取尾程POD向导"
#. module: ccs_base
#: model_terms:ir.ui.view,arch_db:ccs_base.view_batch_update_transfer_bl_no_wizard
msgid "Batch Link Transfer B/L No"
......@@ -945,6 +1024,7 @@ msgstr "清关文件"
#. module: ccs_base
#: model_terms:ir.ui.view,arch_db:ccs_base.view_add_exception_info_wizard
#: model_terms:ir.ui.view,arch_db:ccs_base.view_associate_pallet_wizard
#: model_terms:ir.ui.view,arch_db:ccs_base.view_batch_get_pod_info_wizard_form
#: model_terms:ir.ui.view,arch_db:ccs_base.view_batch_input_ship_package_wizard
#: model_terms:ir.ui.view,arch_db:ccs_base.view_batch_update_transfer_bl_no_wizard
#: model_terms:ir.ui.view,arch_db:ccs_base.view_bl_done_wizard
......@@ -983,6 +1063,7 @@ msgstr "配置设置"
#. module: ccs_base
#: model_terms:ir.ui.view,arch_db:ccs_base.view_add_exception_info_wizard
#: model_terms:ir.ui.view,arch_db:ccs_base.view_associate_pallet_wizard
#: model_terms:ir.ui.view,arch_db:ccs_base.view_batch_get_pod_info_wizard_form
#: model_terms:ir.ui.view,arch_db:ccs_base.view_bl_done_wizard
msgid "Confirm"
msgstr "确认"
......@@ -1030,6 +1111,7 @@ msgstr "快递名称"
#. module: ccs_base
#: model:ir.model.fields,field_description:ccs_base.field_add_exception_info_wizard__create_uid
#: model:ir.model.fields,field_description:ccs_base.field_associate_pallet_wizard__create_uid
#: model:ir.model.fields,field_description:ccs_base.field_batch_get_pod_info_wizard__create_uid
#: model:ir.model.fields,field_description:ccs_base.field_batch_input_ship_package_status_wizard__create_uid
#: model:ir.model.fields,field_description:ccs_base.field_batch_update_transfer_bl_no_wizard__create_uid
#: model:ir.model.fields,field_description:ccs_base.field_bl_done_wizard__create_uid
......@@ -1060,6 +1142,7 @@ msgstr "创建人"
#. module: ccs_base
#: model:ir.model.fields,field_description:ccs_base.field_add_exception_info_wizard__create_date
#: model:ir.model.fields,field_description:ccs_base.field_associate_pallet_wizard__create_date
#: model:ir.model.fields,field_description:ccs_base.field_batch_get_pod_info_wizard__create_date
#: model:ir.model.fields,field_description:ccs_base.field_batch_input_ship_package_status_wizard__create_date
#: model:ir.model.fields,field_description:ccs_base.field_batch_update_transfer_bl_no_wizard__create_date
#: model:ir.model.fields,field_description:ccs_base.field_bl_done_wizard__create_date
......@@ -1250,6 +1333,7 @@ msgstr "消费者地址"
#. module: ccs_base
#: model:ir.model.fields,field_description:ccs_base.field_add_exception_info_wizard__display_name
#: model:ir.model.fields,field_description:ccs_base.field_associate_pallet_wizard__display_name
#: model:ir.model.fields,field_description:ccs_base.field_batch_get_pod_info_wizard__display_name
#: model:ir.model.fields,field_description:ccs_base.field_batch_input_ship_package_status_wizard__display_name
#: model:ir.model.fields,field_description:ccs_base.field_batch_update_transfer_bl_no_wizard__display_name
#: model:ir.model.fields,field_description:ccs_base.field_bl_done_wizard__display_name
......@@ -1449,6 +1533,20 @@ msgstr "导出报关数据"
msgid "Export customs declaration documents"
msgstr "导出报关文件"
#. module: ccs_base
#. odoo-python
#: code:addons/ccs_base/wizard/batch_get_pod_info_wizard.py:0
#, python-format
msgid "Failed to get PDF file from API: %s"
msgstr "从 API 获取 PDF 文件失败: %s"
#. module: ccs_base
#. odoo-python
#: code:addons/ccs_base/wizard/batch_get_pod_info_wizard.py:0
#, python-format
msgid "Failed to save PDF attachment: %s"
msgstr "保存 PDF 附件失败:%s"
#. module: ccs_base
#: model:ir.model.fields,field_description:ccs_base.field_cc_clearance_file__file
#: model_terms:ir.ui.view,arch_db:ccs_base.form_cc_clearance_file_view
......@@ -1660,6 +1758,7 @@ msgstr "历史小包"
#. module: ccs_base
#: model:ir.model.fields,field_description:ccs_base.field_add_exception_info_wizard__id
#: model:ir.model.fields,field_description:ccs_base.field_associate_pallet_wizard__id
#: model:ir.model.fields,field_description:ccs_base.field_batch_get_pod_info_wizard__id
#: model:ir.model.fields,field_description:ccs_base.field_batch_input_ship_package_status_wizard__id
#: model:ir.model.fields,field_description:ccs_base.field_batch_update_transfer_bl_no_wizard__id
#: model:ir.model.fields,field_description:ccs_base.field_bl_done_wizard__id
......@@ -1780,6 +1879,20 @@ msgstr "个人"
msgid "Internal Account Number"
msgstr "内部帐号"
#. module: ccs_base
#. odoo-python
#: code:addons/ccs_base/wizard/batch_get_pod_info_wizard.py:0
#, python-format
msgid "Invalid PDF data for saving: cannot open PDF - %s"
msgstr "用于保存的 PDF 数据无效:无法打开 PDF - %s"
#. module: ccs_base
#. odoo-python
#: code:addons/ccs_base/wizard/batch_get_pod_info_wizard.py:0
#, python-format
msgid "Invalid PDF data for saving: not a valid PDF format"
msgstr "用于保存的 PDF 数据无效:不是有效的 PDF 格式"
#. module: ccs_base
#: model:ir.model.fields,field_description:ccs_base.field_cc_history_ship_package__invoice_attachment_ids
#: model:ir.model.fields,field_description:ccs_base.field_cc_ship_package__invoice_attachment_ids
......@@ -1978,13 +2091,17 @@ msgstr "尾程服务商"
#. module: ccs_base
#: model:ir.actions.act_window,name:ccs_base.action_last_mile_provider
#: model:ir.model.fields,field_description:ccs_base.field_cc_bl__last_mile_provider_ids
#: model:ir.ui.menu,name:ccs_base.menu_last_mile_provider
#: model_terms:ir.ui.view,arch_db:ccs_base.form_cc_bl_view
#: model_terms:ir.ui.view,arch_db:ccs_base.tree_cc_bl_view
msgid "Last Mile Providers"
msgstr "尾程服务商"
#. module: ccs_base
#: model:ir.model.fields,field_description:ccs_base.field_add_exception_info_wizard____last_update
#: model:ir.model.fields,field_description:ccs_base.field_associate_pallet_wizard____last_update
#: model:ir.model.fields,field_description:ccs_base.field_batch_get_pod_info_wizard____last_update
#: model:ir.model.fields,field_description:ccs_base.field_batch_input_ship_package_status_wizard____last_update
#: model:ir.model.fields,field_description:ccs_base.field_batch_update_transfer_bl_no_wizard____last_update
#: model:ir.model.fields,field_description:ccs_base.field_bl_done_wizard____last_update
......@@ -2021,6 +2138,7 @@ msgstr "最近操作时间"
#. module: ccs_base
#: model:ir.model.fields,field_description:ccs_base.field_add_exception_info_wizard__write_uid
#: model:ir.model.fields,field_description:ccs_base.field_associate_pallet_wizard__write_uid
#: model:ir.model.fields,field_description:ccs_base.field_batch_get_pod_info_wizard__write_uid
#: model:ir.model.fields,field_description:ccs_base.field_batch_input_ship_package_status_wizard__write_uid
#: model:ir.model.fields,field_description:ccs_base.field_batch_update_transfer_bl_no_wizard__write_uid
#: model:ir.model.fields,field_description:ccs_base.field_bl_done_wizard__write_uid
......@@ -2051,6 +2169,7 @@ msgstr "最后更新人"
#. module: ccs_base
#: model:ir.model.fields,field_description:ccs_base.field_add_exception_info_wizard__write_date
#: model:ir.model.fields,field_description:ccs_base.field_associate_pallet_wizard__write_date
#: model:ir.model.fields,field_description:ccs_base.field_batch_get_pod_info_wizard__write_date
#: model:ir.model.fields,field_description:ccs_base.field_batch_input_ship_package_status_wizard__write_date
#: model:ir.model.fields,field_description:ccs_base.field_batch_update_transfer_bl_no_wizard__write_date
#: model:ir.model.fields,field_description:ccs_base.field_bl_done_wizard__write_date
......@@ -2314,6 +2433,20 @@ msgstr "下一阶段服务商名称"
msgid "No Bill of Loading"
msgstr "提单号"
#. module: ccs_base
#. odoo-python
#: code:addons/ccs_base/wizard/batch_get_pod_info_wizard.py:0
#, python-format
msgid "No PDF files found"
msgstr "无法获取到pdf文件"
#. module: ccs_base
#. odoo-python
#: code:addons/ccs_base/wizard/batch_get_pod_info_wizard.py:0
#, python-format
msgid "No PDF files found in API response"
msgstr "API调用成功,但没有PDF文件"
#. module: ccs_base
#. odoo-python
#: code:addons/ccs_base/wizard/batch_input_ship_package_statu_wizard.py:0
......@@ -2321,6 +2454,13 @@ msgstr "提单号"
msgid "No package to update found."
msgstr "未找到要更新的小包。"
#. module: ccs_base
#. odoo-python
#: code:addons/ccs_base/wizard/batch_get_pod_info_wizard.py:0
#, python-format
msgid "No processed file data available"
msgstr "没有处理后的文件数据"
#. module: ccs_base
#: model:ir.model.fields,field_description:ccs_base.field_cc_node_exception_reason__code_id
#: model_terms:ir.ui.view,arch_db:ccs_base.search_cc_node_exception_reason_view
......@@ -2903,6 +3043,11 @@ msgstr "收件人类型"
msgid "Receiver VAT"
msgstr "收件人税号"
#. module: ccs_base
#: model:ir.model.fields,field_description:ccs_base.field_batch_get_pod_info_wizard__remove_specified_text
msgid "Remove Specified Text"
msgstr "涂抹指定文字"
#. module: ccs_base
#: model:ir.model.fields,field_description:ccs_base.field_cc_big_package__activity_user_id
#: model:ir.model.fields,field_description:ccs_base.field_cc_bl__activity_user_id
......@@ -3270,6 +3415,11 @@ msgstr ""
msgid "Submit"
msgstr "提交"
#. module: ccs_base
#: model:ir.model.fields,field_description:ccs_base.field_batch_get_pod_info_wizard__sync_last_mile_pod
msgid "Sync Last Mile POD"
msgstr "同步尾程POD"
#. module: ccs_base
#: model_terms:ir.ui.view,arch_db:ccs_base.form_cc_history_ship_package_view
msgid "Sync Log"
......@@ -3414,13 +3564,6 @@ msgstr "物流订单号必须唯一。"
msgid "The Transfer B/L No. must be unique."
msgstr "转单号必填"
#. module: ccs_base
#. odoo-python
#: code:addons/ccs_base/models/cc_bill_loading.py:0
#, python-format
msgid "The bill of loading is completed, but the node exception reason is %s!"
msgstr "提单已完成,但是节点异常,原因是:%s!"
#. module: ccs_base
#. odoo-python
#: code:addons/ccs_base/models/cc_bill_loading.py:0
......@@ -3465,6 +3608,15 @@ msgstr "节点 [%s] 序号小于现有节点序号。"
msgid "The pallet number can only be entered as a number!"
msgstr "托盘号只能输入数字!"
#. module: ccs_base
#. odoo-python
#: code:addons/ccs_base/wizard/batch_input_ship_package_statu_wizard.py:0
#, python-format
msgid ""
"The selected operation time exceeds %s hours, please confirm the operation "
"time again"
msgstr "所选操作时间超过 %s 小时,请重新确认操作时间"
#. module: ccs_base
#. odoo-python
#: code:addons/ccs_base/models/cc_bill_loading.py:0
......@@ -3505,6 +3657,11 @@ msgstr "本周提单"
msgid "This week ship package"
msgstr "本周小包"
#. module: ccs_base
#: model:ir.model.fields,field_description:ccs_base.field_batch_input_ship_package_status_wizard__time_warning
msgid "Time Warning"
msgstr "时间预警"
#. module: ccs_base
#: model_terms:ir.ui.view,arch_db:ccs_base.search_cc_bl_view
msgid "Today Deadline"
......@@ -3804,6 +3961,16 @@ msgstr "重量单位"
msgid "Whether to export in Big packages"
msgstr "是否分大包导出"
#. module: ccs_base
#: model:ir.model.fields,help:ccs_base.field_batch_get_pod_info_wizard__remove_specified_text
msgid "Whether to remove specified text from PDF files"
msgstr "是否涂抹PDF中的指定文字"
#. module: ccs_base
#: model:ir.model.fields,help:ccs_base.field_batch_get_pod_info_wizard__sync_last_mile_pod
msgid "Whether to sync last mile POD information"
msgstr "是否同步尾程POD信息"
#. module: ccs_base
#: model:ir.model.fields.selection,name:ccs_base.selection__export_bl_big_package_xlsx_wizard__select_type__yes
msgid "YES"
......
......@@ -16,3 +16,5 @@ from . import cc_history_package_good
from . import cc_history_ship_package
from . import cc_history_package_sync_log
from . import history_tt_api_log
......@@ -554,6 +554,9 @@ class CcClearanceFile(models.Model):
def action_sync(self):
pass
def search_clearance_file(self, bl_id, file_name):
"""搜索清关文件"""
return self.env['cc.clearance.file'].search([('bl_id','=',bl_id),('file_name','=',file_name)],limit=1)
# 创建一个业务对象,继承自models.Model, 用于管理业务数据.业务数据包括提单号、提单日期、提单总件数、提单总金额、所属客户、提单明细、清关进度明细、状态[待确认、清关中、已完成]
class CcBL(models.Model):
......@@ -647,7 +650,8 @@ class CcBL(models.Model):
big_package_qty = fields.Integer(string='Big Package Qty')
# 已提货大包数量
picked_up_big_package_qty = fields.Integer(string='Picked Up Big Package Qty', compute='cal_picked_up_big_package_qty',
picked_up_big_package_qty = fields.Integer(string='Picked Up Big Package Qty',
compute='cal_picked_up_big_package_qty',
store=True)
# 理货大包数量
tally_big_package_qty = fields.Integer(string='Tally Big Package Qty', compute='cal_tally_big_package_qty',
......@@ -779,6 +783,58 @@ class CcBL(models.Model):
# 定义清关国家,关联到国家字段
cc_country_id = fields.Many2one('res.country', string='CC Country')
# 新增尾程快递,many2many,cc.last.mile.provider
last_mile_provider_ids = fields.Many2many('cc.last.mile.provider', 'cc_bill_loading_last_mile_provider_rel',
'bl_id', 'last_mile_provider_id', string='Last Mile Providers',
compute='_compute_last_mile_provider_ids', store=True)
@api.depends('big_package_ids', 'big_package_ids.next_provider_name')
def _compute_last_mile_provider_ids(self):
"""
提单关联的大包对应下一个尾程快递匹配到到尾程快递
"""
# 一次性获取所有尾程快递,避免重复查询
all_providers = self.env['cc.last.mile.provider'].sudo().search([])
# 预构建匹配值映射,提高查找效率
provider_matching_map = {}
for provider in all_providers:
if provider.matching_value:
# 预处理匹配值,避免重复split和lower操作
matching_values = [value.lower().strip() for value in provider.matching_value.split('\n') if
value.strip()]
for value in matching_values:
if value not in provider_matching_map:
provider_matching_map[value] = []
provider_matching_map[value].append(provider.id)
for bl in self:
# 先清空现有记录
bl.last_mile_provider_ids = [(6, 0, [])]
if not bl.big_package_ids:
continue
# 获取所有大包的下一个快递名称(去重)
provider_names = bl.big_package_ids.filtered('next_provider_name').mapped('next_provider_name')
if not provider_names:
continue
# 去重provider名称
provider_names = list(set(provider_names))
# 使用集合存储匹配的provider ID,避免重复
matched_provider_ids = set()
# 使用预构建的映射进行快速查找
for provider_name in provider_names:
provider_name_lower = provider_name.lower().strip()
if provider_name_lower in provider_matching_map:
matched_provider_ids.update(provider_matching_map[provider_name_lower])
# 更新字段值
if matched_provider_ids:
bl.last_mile_provider_ids = [(6, 0, list(matched_provider_ids))]
def push_clear_customs_start(self, utc_time):
# 创建向导
push_node_obj = self.env['cc.node'].sudo().search(
......@@ -870,17 +926,17 @@ class CcBL(models.Model):
}
}
def check_is_done(self, is_email=False,**kwargs):
def check_is_done(self, is_email=False, **kwargs):
# 如果提单所有小包的清关节点变成"是完成节点",则该提单状态变成已完成.tk模块有继承
if all(line.state.is_done for line in
self.ship_package_ids) and self.customs_clearance_status.is_done and self.is_bl_sync:
self.done_func(is_email,**kwargs)
self.done_func(is_email, **kwargs)
else:
if not is_email:
raise ValidationError(
_('The small package node or bill of lading node is not in the completed node, and the bill of lading cannot be changed to completed!')) # 小包节点或提单节点不在已完成节点,提单不能变为已完成!
def done_func(self, is_email=False,**kwargs):
def done_func(self, is_email=False, **kwargs):
"""
变为已完成
"""
......@@ -890,7 +946,7 @@ class CcBL(models.Model):
# 如果选择了忽略节点异常,则记录异常原因
if kwargs.get('exception_reason'):
exception_reason = kwargs['exception_reason']
#输出当前用户是什么语言
# 输出当前用户是什么语言
if self.env.user.lang == 'zh_CN':
message = '提单已完成,但存在节点异常,原因是:%s!' % exception_reason
else:
......@@ -1089,6 +1145,17 @@ class CcBL(models.Model):
'default_current_status': customs_clearance_status_list[0]}
}
def action_batch_get_pod_info(self):
"""批量获取尾程POD信息"""
return {
'name': _('Batch Get POD Info'),
'type': 'ir.actions.act_window',
'view_mode': 'form',
'res_model': 'batch.get.pod.info.wizard',
'context': {'active_id': self.ids},
'target': 'new',
}
# 增加一个清关进度的业务对象,继承自models.Model, 用于管理业务数据.业务数据包括提单号、清关节点(业务对象)、进度日期、进度描述、更新人
class CcProgress(models.Model):
......
......@@ -6,8 +6,7 @@ add_exception_info_wizard_group_user,add_exception_info_wizard_group_user,ccs_ba
update_bl_status_wizard_group_user,update_bl_status_wizard_group_user,ccs_base.model_update_bl_status_wizard,base.group_user,1,1,1,1
batch_update_transfer_bl_no_wizard_group_user,batch_update_transfer_bl_no_wizard_group_user,ccs_base.model_batch_update_transfer_bl_no_wizard,base.group_user,1,1,1,1
bl_done_wizard_group_user,bl_done_wizard_group_user,ccs_base.model_bl_done_wizard,base.group_user,1,1,1,1
batch_get_pod_info_wizard_group_user,batch_get_pod_info_wizard_group_user,ccs_base.model_batch_get_pod_info_wizard,base.group_user,1,1,1,1
access_group_user_common_common,access_group_user_common_common,model_common_common,base.group_user,1,1,1,1
......
<!DOCTYPE html>
<html lang="zh-CN">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>PDF OCR文字识别删除工具</title>
<script src="https://cdnjs.cloudflare.com/ajax/libs/pdf-lib/1.17.1/pdf-lib.min.js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/pdf.js/3.11.174/pdf.min.js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/tesseract.js/4.1.1/tesseract.min.js"></script>
<style>
body {
font-family: Arial, sans-serif;
max-width: 1200px;
margin: 0 auto;
padding: 20px;
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
min-height: 100vh;
}
.container {
background: white;
border-radius: 20px;
padding: 30px;
margin: 20px 0;
box-shadow: 0 10px 30px rgba(0,0,0,0.2);
}
.header {
text-align: center;
margin-bottom: 30px;
padding: 20px;
background: linear-gradient(135deg, #9c27b0 0%, #673ab7 100%);
color: white;
border-radius: 15px;
}
.upload-area {
border: 3px dashed #9c27b0;
padding: 40px;
text-align: center;
border-radius: 15px;
background: #f8f9fa;
margin: 20px 0;
transition: all 0.3s ease;
cursor: pointer;
}
.upload-area:hover {
border-color: #007bff;
background: #e3f2fd;
}
.upload-area.dragover {
border-color: #007bff;
background: #e3f2fd;
transform: scale(1.02);
}
button {
background: linear-gradient(135deg, #9c27b0 0%, #673ab7 100%);
color: white;
padding: 15px 30px;
border: none;
border-radius: 10px;
cursor: pointer;
margin: 10px 5px;
font-size: 16px;
font-weight: bold;
transition: all 0.3s ease;
box-shadow: 0 4px 15px rgba(156,39,176,0.3);
}
button:hover {
transform: translateY(-2px);
box-shadow: 0 6px 20px rgba(156,39,176,0.4);
}
button:disabled {
background: #6c757d;
cursor: not-allowed;
transform: none;
box-shadow: none;
}
.success-btn {
background: linear-gradient(135deg, #28a745 0%, #20c997 100%);
}
.warning-btn {
background: linear-gradient(135deg, #ffc107 0%, #fd7e14 100%);
}
.info-btn {
background: linear-gradient(135deg, #17a2b8 0%, #138496 100%);
}
input[type="file"] {
display: none;
}
.result {
margin: 20px 0;
padding: 20px;
border-radius: 10px;
font-weight: bold;
font-size: 16px;
}
.success {
background: linear-gradient(135deg, #d4edda 0%, #c3e6cb 100%);
color: #155724;
border: 2px solid #c3e6cb;
}
.error {
background: linear-gradient(135deg, #f8d7da 0%, #f5c6cb 100%);
color: #721c24;
border: 2px solid #f5c6cb;
}
.info {
background: linear-gradient(135deg, #d1ecf1 0%, #bee5eb 100%);
color: #0c5460;
border: 2px solid #bee5eb;
}
.progress {
background: linear-gradient(135deg, #fff3cd 0%, #ffeaa7 100%);
color: #856404;
border: 2px solid #ffeaa7;
}
.strategy-options {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(250px, 1fr));
gap: 15px;
margin: 20px 0;
}
.strategy-card {
background: #f8f9fa;
padding: 20px;
border-radius: 10px;
border: 2px solid #e9ecef;
cursor: pointer;
transition: all 0.3s ease;
text-align: center;
}
.strategy-card:hover {
border-color: #9c27b0;
background: #f3e5f5;
}
.strategy-card.selected {
border-color: #9c27b0;
background: #f3e5f5;
}
.stats {
background: linear-gradient(135deg, #e3f2fd 0%, #bbdefb 100%);
padding: 20px;
border-radius: 10px;
margin: 15px 0;
border-left: 5px solid #2196f3;
}
.ocr-info {
background: linear-gradient(135deg, #f3e5f5 0%, #e1bee7 100%);
padding: 20px;
border-radius: 10px;
margin: 15px 0;
border: 2px solid #9c27b0;
}
.coordinate-info {
background: #f8f9fa;
padding: 15px;
border-radius: 8px;
margin: 10px 0;
font-family: monospace;
font-size: 12px;
border-left: 4px solid #9c27b0;
}
.text-detection {
background: #f8f9fa;
padding: 20px;
border-radius: 10px;
margin: 15px 0;
border-left: 5px solid #9c27b0;
}
.text-item {
background: white;
padding: 15px;
margin: 10px 0;
border-radius: 8px;
border: 1px solid #ddd;
display: flex;
justify-content: space-between;
align-items: center;
box-shadow: 0 2px 5px rgba(0,0,0,0.1);
}
.text-item.found {
background: #d4edda;
border-color: #28a745;
}
.text-item.not-found {
background: #f8d7da;
border-color: #dc3545;
}
.coordinate-info {
font-size: 12px;
color: #666;
font-family: monospace;
}
.detected-text {
padding: 5px 10px;
border-radius: 5px;
font-weight: bold;
color: white;
}
.detected-text.agn {
background: #ff9800;
}
.detected-text.uclink {
background: #f44336;
}
</style>
</head>
<body>
<div class="header">
<h1>🔍 PDF OCR文字识别删除工具</h1>
<p>使用OCR技术识别扫描件PDF中的文字,然后精确删除</p>
<p>支持扫描件PDF的文字识别和智能删除</p>
</div>
<div class="container">
<h3>📁 上传PDF文件</h3>
<div class="upload-area" id="uploadArea">
<h4>📄 拖拽PDF文件到这里</h4>
<p>或点击下方按钮选择文件</p>
<input type="file" id="pdfInput" accept=".pdf">
<button id="selectFileBtn">选择PDF文件</button>
</div>
<div id="fileInfo" style="display: none;">
<div class="stats">
<h4>📋 文件信息</h4>
<p id="fileName"></p>
<p id="fileSize"></p>
<p id="pageCount"></p>
</div>
</div>
</div>
<div class="container">
<h3>🎯 OCR识别策略</h3>
<div class="ocr-info">
<h4>🔍 OCR文字识别说明</h4>
<p>扫描件PDF的文字位置各不相同,必须使用OCR技术智能识别文字位置,然后根据识别结果精确删除目标文字。</p>
<p><strong>为什么需要OCR:</strong>扫描件PDF中的文字是图像,无法直接获取坐标,必须通过OCR识别才能准确定位。</p>
</div>
<div class="strategy-options">
<div class="strategy-card selected" onclick="selectStrategy('ocr')">
<h4>🔍 OCR识别策略(推荐)</h4>
<p>智能识别扫描件文字位置</p>
<small>最精确,适应不同PDF布局</small>
</div>
<div class="strategy-card" onclick="selectStrategy('hybrid')">
<h4>🔄 混合策略</h4>
<p>OCR识别 + 预设坐标</p>
<small>双重保障,确保识别成功</small>
</div>
<div class="strategy-card" onclick="selectStrategy('fallback')">
<h4>🛡️ 回退策略</h4>
<p>OCR失败时使用预设坐标</p>
<small>最后的安全网</small>
</div>
</div>
<div style="text-align: center; margin: 20px 0;">
<button id="processBtn" onclick="processPDF()" disabled class="success-btn">🔍 开始OCR识别与删除</button>
<button id="fallbackBtn" onclick="selectStrategy('fallback'); processPDF();" disabled class="info-btn" style="display: none;">🛡️ OCR卡住?使用回退策略</button>
<button onclick="resetForm()" class="warning-btn">🔄 重置</button>
</div>
<div class="container">
<h3>🎯 删除精度设置</h3>
<div class="info">
<h4>📏 删除范围调整:</h4>
<p>当前设置:<strong id="precisionText">超精确模式(±0像素)</strong></p>
<input type="range" id="precisionSlider" min="0" max="5" value="0" style="width: 100%; margin: 10px 0;">
<div style="display: flex; justify-content: space-between; font-size: 12px; color: #666;">
<span>最精确</span>
<span>最安全</span>
</div>
<p><small>调整滑块可以控制删除范围的大小。超精确模式只删除文字本身,不扩展任何范围。</small></p>
<div style="margin-top: 10px;">
<button onclick="setPrecision(0)" style="background: #28a745; color: white; padding: 5px 10px; border: none; border-radius: 3px; margin: 2px;">超精确模式</button>
<button onclick="setPrecision(1)" style="background: #17a2b8; color: white; padding: 5px 10px; border: none; border-radius: 3px; margin: 2px;">精确模式</button>
<button onclick="setPrecision(2)" style="background: #ffc107; color: black; padding: 5px 10px; border: none; border-radius: 3px; margin: 2px;">平衡模式</button>
</div>
</div>
</div>
<div class="container">
<h3>📄 多页处理设置</h3>
<div class="info">
<h4>🔧 页码处理:</h4>
<p>✅ 自动排除页码文字(如"Page 1 of 1"、"2/30"等)</p>
<p>✅ 智能识别页码模式,避免误删</p>
<p>✅ 多页进度显示,实时更新处理状态</p>
<p><small>对于多页PDF,工具会自动识别并排除页码相关的文字,确保只删除目标内容。</small></p>
</div>
</div>
<div id="result"></div>
</div>
<div class="container">
<h3>📊 OCR识别结果</h3>
<div id="ocrResults"></div>
</div>
<div class="container">
<h3>📋 使用说明</h3>
<div class="info">
<h4>🎯 OCR处理流程:</h4>
<ol>
<li><strong>上传文件:</strong>选择您的扫描件PDF文件</li>
<li><strong>选择策略:</strong>选择OCR识别策略(推荐OCR识别)</li>
<li><strong>开始处理:</strong>点击"开始OCR识别与删除"按钮</li>
<li><strong>等待识别:</strong>OCR需要时间识别文字(可能需要几分钟)</li>
<li><strong>查看结果:</strong>查看识别到的文字位置和坐标</li>
<li><strong>下载文件:</strong>获取处理后的PDF文件</li>
</ol>
<h4>💡 为什么必须使用OCR:</h4>
<ul>
<li><strong>扫描件特性:</strong>扫描件PDF中的文字是图像,不是可选择的文本</li>
<li><strong>位置不固定:</strong>每个PDF的文字位置都不同,无法使用固定坐标</li>
<li><strong>精确识别:</strong>只有OCR才能准确识别文字的具体位置</li>
<li><strong>智能适应:</strong>OCR可以适应不同的PDF布局和格式</li>
</ul>
<h4>✅ OCR识别优势:</h4>
<ul>
<li>智能识别文字位置,适应不同PDF布局</li>
<li>精确删除目标文字,不会误删其他内容</li>
<li>支持扫描件PDF处理</li>
<li>显示所有识别到的文字,便于验证</li>
<li>处理所有页面,确保完整删除</li>
</ul>
<h4>⚠️ 注意事项:</h4>
<ul>
<li>OCR识别需要时间,请耐心等待</li>
<li>识别精度取决于PDF质量和文字清晰度</li>
<li>如果OCR失败,会自动使用预设坐标</li>
<li>建议使用高质量的扫描件PDF</li>
<li><strong>OCR是处理扫描件的唯一有效方法</strong></li>
<li>固定坐标无法适应不同PDF的文字位置</li>
</ul>
</div>
</div>
<script>
pdfjsLib.GlobalWorkerOptions.workerSrc = 'https://cdnjs.cloudflare.com/ajax/libs/pdf.js/3.11.174/pdf.worker.min.js';
let pdfDoc = null;
let pdfjsDocument = null;
let selectedStrategy = 'ocr';
let fileInput = document.getElementById('pdfInput');
let processBtn = document.getElementById('processBtn');
let resultDiv = document.getElementById('result');
let ocrResultsDiv = document.getElementById('ocrResults');
const TARGET_TEXTS = ['AGN', 'UCLINK LOGISITICS LTD', 'UCLINK LOGISITICS', 'UCLINK', 'LOGISITICS', 'LOGISTICS', 'LTD'];
// 排除不应该删除的文字
const EXCLUDE_TEXTS = ['AIR EQK', 'ARN', 'EQK', 'AIR', 'Page 1 of 1', 'Page 2 of 2', 'Page 3 of 3', 'Page 4 of 4', 'Page 5 of 5'];
// 删除精度设置
let deletePrecision = 0; // 默认超精确模式
// 最简单的文件选择处理
document.addEventListener('DOMContentLoaded', function() {
console.log('页面加载完成');
// 直接绑定事件
const fileInput = document.getElementById('pdfInput');
const uploadArea = document.getElementById('uploadArea');
const selectBtn = document.getElementById('selectFileBtn');
const precisionSlider = document.getElementById('precisionSlider');
const precisionText = document.getElementById('precisionText');
console.log('元素检查:', {
fileInput: !!fileInput,
uploadArea: !!uploadArea,
selectBtn: !!selectBtn,
precisionSlider: !!precisionSlider
});
// 精度滑块事件
if (precisionSlider) {
precisionSlider.addEventListener('input', function() {
deletePrecision = parseInt(this.value);
const modes = ['超精确模式(±0像素)', '精确模式(±1像素)', '平衡模式(±2像素)', '安全模式(±3像素)', '宽松模式(±4像素)', '最安全模式(±5像素)'];
precisionText.textContent = modes[deletePrecision];
});
}
// 上传区域点击
uploadArea.onclick = function() {
console.log('点击上传区域');
fileInput.click();
};
// 按钮点击
selectBtn.onclick = function(e) {
e.preventDefault();
console.log('点击选择按钮');
fileInput.click();
};
// 文件选择
fileInput.onchange = function(e) {
console.log('文件选择事件');
handleFileSelect();
};
// 拖拽处理
uploadArea.ondragover = function(e) {
e.preventDefault();
uploadArea.classList.add('dragover');
};
uploadArea.ondragleave = function() {
uploadArea.classList.remove('dragover');
};
uploadArea.ondrop = function(e) {
e.preventDefault();
uploadArea.classList.remove('dragover');
const files = e.dataTransfer.files;
if (files.length > 0) {
fileInput.files = files;
handleFileSelect();
}
};
});
async function handleFileSelect() {
console.log('文件选择事件触发');
const file = fileInput.files[0];
if (!file) {
console.log('没有选择文件');
return;
}
console.log('选择的文件:', file.name, file.size);
// 显示文件信息
document.getElementById('fileName').textContent = `文件名: ${file.name}`;
document.getElementById('fileSize').textContent = `文件大小: ${(file.size / 1024).toFixed(2)} KB`;
document.getElementById('pageCount').textContent = `页面数: 加载中...`;
document.getElementById('fileInfo').style.display = 'block';
resultDiv.innerHTML = '<div class="progress">📄 正在加载PDF文件...</div>';
ocrResultsDiv.innerHTML = '';
processBtn.disabled = true;
try {
const pdfBytes = await file.arrayBuffer();
pdfDoc = await PDFLib.PDFDocument.load(pdfBytes);
pdfjsDocument = await pdfjsLib.getDocument({ data: pdfBytes }).promise;
const pageCount = pdfDoc.getPageCount();
document.getElementById('pageCount').textContent = `页面数: ${pageCount}`;
resultDiv.innerHTML = '<div class="success">✅ PDF文件加载成功,可以开始处理</div>';
processBtn.disabled = false;
console.log('PDF加载成功,页面数:', pageCount);
} catch (error) {
document.getElementById('pageCount').textContent = `页面数: 加载失败`;
resultDiv.innerHTML = `<div class="error">❌ PDF加载失败: ${error.message}</div>`;
processBtn.disabled = false;
console.error('PDF加载失败:', error);
}
}
function selectStrategy(strategy) {
selectedStrategy = strategy;
// 更新UI
document.querySelectorAll('.strategy-card').forEach(card => {
card.classList.remove('selected');
});
event.target.closest('.strategy-card').classList.add('selected');
resultDiv.innerHTML = `<div class="info">✅ 已选择策略: ${getStrategyName(strategy)}</div>`;
}
function getStrategyName(strategy) {
const names = {
'ocr': 'OCR识别策略',
'hybrid': '混合策略',
'fallback': '回退策略'
};
return names[strategy] || strategy;
}
async function processPDF() {
if (!pdfDoc || !pdfjsDocument) {
resultDiv.innerHTML = '<div class="error">请先上传PDF文件</div>';
return;
}
try {
resultDiv.innerHTML = '<div class="progress">🔍 正在启动OCR识别,请稍候...</div>';
processBtn.disabled = true;
if (selectedStrategy === 'ocr') {
await processOCR();
} else if (selectedStrategy === 'hybrid') {
await processHybrid();
} else if (selectedStrategy === 'fallback') {
await processFallback();
}
} catch (error) {
resultDiv.innerHTML = `<div class="error">❌ 处理失败: ${error.message}</div>`;
console.error('处理错误:', error);
} finally {
processBtn.disabled = false;
}
}
async function processOCR() {
const pages = pdfDoc.getPages();
let processedPages = 0;
let totalRectangles = 0;
let detectedTexts = [];
let allRecognizedTexts = []; // 存储所有识别到的文字
try {
// 显示回退按钮
document.getElementById('fallbackBtn').style.display = 'inline-block';
document.getElementById('fallbackBtn').disabled = false;
// 识别所有页面
for (let pageNum = 0; pageNum < pdfjsDocument.numPages; pageNum++) {
resultDiv.innerHTML = `<div class="progress">🔍 正在OCR识别第 ${pageNum + 1}/${pdfjsDocument.numPages} 页... (已找到 ${detectedTexts.length} 个目标文字)</div>`;
try {
const pdfjsPage = await pdfjsDocument.getPage(pageNum + 1);
const viewport = pdfjsPage.getViewport({ scale: 2.0 }); // 提高分辨率
// 创建canvas
const canvas = document.createElement('canvas');
const context = canvas.getContext('2d');
canvas.height = viewport.height;
canvas.width = viewport.width;
// 渲染PDF页面到canvas
const renderContext = {
canvasContext: context,
viewport: viewport
};
await pdfjsPage.render(renderContext).promise;
// 使用Tesseract.js进行OCR识别,优化配置
if (pageNum === 0) {
resultDiv.innerHTML = `<div class="progress">🔍 正在启动OCR引擎,请稍候...</div>`;
}
// 优化OCR配置,提高识别精度
const { data: { text, words } } = await Tesseract.recognize(canvas, 'eng', {
logger: m => {
console.log('OCR进度:', m);
if (m.status === 'loading tesseract core') {
resultDiv.innerHTML = `<div class="progress">🔍 正在加载OCR引擎... ${Math.round(m.progress * 100)}%</div>`;
} else if (m.status === 'initializing tesseract') {
resultDiv.innerHTML = `<div class="progress">🔍 正在初始化OCR... ${Math.round(m.progress * 100)}%</div>`;
} else if (m.status === 'loading language traineddata') {
resultDiv.innerHTML = `<div class="progress">🔍 正在加载语言包... ${Math.round(m.progress * 100)}%</div>`;
} else if (m.status === 'initializing api') {
resultDiv.innerHTML = `<div class="progress">🔍 正在初始化API... ${Math.round(m.progress * 100)}%</div>`;
} else if (m.status === 'recognizing text') {
resultDiv.innerHTML = `<div class="progress">🔍 正在识别第 ${pageNum + 1} 页文字... ${Math.round(m.progress * 100)}%</div>`;
}
},
// 优化OCR参数,提高对短词和单独字母的识别
tessedit_pageseg_mode: '6', // 单一文本块,提高短词识别
tessedit_ocr_engine_mode: '1', // LSTM OCR引擎
tessedit_char_whitelist: 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789.,- ', // 限制字符集
preserve_interword_spaces: '1', // 保留单词间距
tessedit_do_invert: '0', // 不反转图像
textord_min_linesize: '1.0', // 降低最小行大小,识别更小的文字
classify_bln_numeric_mode: '0', // 数字模式
textord_force_make_prop_words: 'F', // 不强制制作比例单词
textord_min_xheight: '8', // 最小字符高度
textord_tabfind_show_vlines: '0' // 不显示垂直线
});
// 存储所有识别到的文字
const pageRecognizedTexts = words.map(word => ({
text: word.text,
confidence: word.confidence,
bbox: word.bbox,
page: pageNum
}));
allRecognizedTexts = allRecognizedTexts.concat(pageRecognizedTexts);
// 获取页面尺寸
const page = pages[pageNum];
const { width: pageWidth, height: pageHeight } = page.getSize();
// 查找目标文字
const pageTexts = findTargetTexts(words, pageNum, viewport.width, viewport.height, pageWidth, pageHeight);
detectedTexts = detectedTexts.concat(pageTexts);
resultDiv.innerHTML = `<div class="progress">🔍 第 ${pageNum + 1} 页OCR完成,找到 ${pageTexts.length} 个目标文字</div>`;
} catch (error) {
console.warn(`第 ${pageNum + 1} 页OCR失败:`, error);
resultDiv.innerHTML = `<div class="progress">⚠️ 第 ${pageNum + 1} 页OCR失败,继续处理其他页面...</div>`;
}
}
// 隐藏回退按钮
document.getElementById('fallbackBtn').style.display = 'none';
// 显示所有识别到的文字和OCR结果
displayAllRecognizedTexts(allRecognizedTexts);
displayOCRResults(detectedTexts);
// 显示排除的文字
const excludedWords = allRecognizedTexts.filter(word => {
const text = word.text.trim().toUpperCase();
return EXCLUDE_TEXTS.some(excludeText => {
const excludeUpper = excludeText.toUpperCase();
return text.includes(excludeUpper) || excludeUpper.includes(text);
});
});
if (excludedWords.length > 0) {
console.log('已排除的文字:', excludedWords.map(w => w.text));
resultDiv.innerHTML += `<div class="info">🛡️ 已排除 ${excludedWords.length} 个不应删除的文字: ${excludedWords.map(w => w.text).join(', ')}</div>`;
}
// 检查是否缺少LTD,如果缺少则添加提示
const hasLTD = detectedTexts.some(text => text.text === 'LTD');
if (!hasLTD) {
console.log('⚠️ 未识别到LTD,可能需要调整OCR参数');
resultDiv.innerHTML += `<div class="warning">⚠️ 未识别到"LTD"文字,如果PDF中确实存在,请尝试调整OCR参数或使用回退策略</div>`;
}
// 统计页码信息
const pageNumbers = allRecognizedTexts.filter(word => {
const text = word.text.trim();
return text.match(/^PAGE\s+\d+\s+OF\s+\d+$/i) || text.match(/^\d+\s*\/\s*\d+$/);
});
if (pageNumbers.length > 0) {
console.log('检测到页码:', pageNumbers.map(p => p.text));
resultDiv.innerHTML += `<div class="info">📄 检测到 ${pageNumbers.length} 个页码,已自动排除</div>`;
}
// 根据OCR结果删除文字(处理所有页面)
if (detectedTexts.length > 0) {
const textsByPage = {};
detectedTexts.forEach(text => {
if (!textsByPage[text.page]) {
textsByPage[text.page] = [];
}
textsByPage[text.page].push(text);
});
Object.keys(textsByPage).forEach(pageNum => {
const page = pages[pageNum];
const pageTexts = textsByPage[pageNum];
const { width: pageWidth, height: pageHeight } = page.getSize();
pageTexts.forEach(text => {
// 超精确删除模式
let rect;
if (deletePrecision === 0) {
// 超精确模式:只删除文字本身,不扩展
rect = {
x: text.x,
y: text.y,
width: text.width,
height: text.height
};
} else {
// 其他模式:按精度扩展
rect = {
x: Math.max(0, text.x - deletePrecision),
y: Math.max(0, text.y - deletePrecision),
width: text.width + (deletePrecision * 2),
height: text.height + (deletePrecision * 2)
};
}
page.drawRectangle({
x: rect.x,
y: rect.y,
width: rect.width,
height: rect.height,
color: PDFLib.rgb(1, 1, 1) // 白色覆盖
});
totalRectangles++;
});
processedPages++;
});
await saveAndDownload(processedPages, totalRectangles, 'ocr_processed');
} else {
resultDiv.innerHTML = `
<div class="warning">
⚠️ OCR未识别到目标文字<br>
<button onclick="selectStrategy('fallback'); processPDF();" class="success-btn" style="margin-top: 10px;">
🛡️ 使用回退策略
</button>
</div>
`;
}
} catch (error) {
resultDiv.innerHTML = `
<div class="error">
❌ OCR识别失败: ${error.message}<br>
<button onclick="selectStrategy('fallback'); processPDF();" class="success-btn" style="margin-top: 10px;">
🛡️ 使用回退策略
</button>
</div>
`;
}
}
function findTargetTexts(words, pageNum, viewportWidth, viewportHeight, pageWidth, pageHeight) {
const foundTexts = [];
// 改进的文字匹配逻辑
words.forEach(word => {
const text = word.text.trim().toUpperCase();
// 首先检查是否在排除列表中
let isExcluded = EXCLUDE_TEXTS.some(excludeText => {
const excludeUpper = excludeText.toUpperCase();
return text.includes(excludeUpper) || excludeUpper.includes(text);
});
// 检查页码模式(Page X of Y)
if (!isExcluded && text.match(/^PAGE\s+\d+\s+OF\s+\d+$/i)) {
isExcluded = true;
console.log('排除页码:', word.text);
}
// 检查单独的页码数字
if (!isExcluded && text.match(/^\d+\s*\/\s*\d+$/)) {
isExcluded = true;
console.log('排除页码数字:', word.text);
}
if (isExcluded) {
console.log('排除文字:', word.text);
return; // 跳过这个文字
}
TARGET_TEXTS.forEach(targetText => {
const targetUpper = targetText.toUpperCase();
// 更严格的匹配方式,避免误匹配
let isMatch = false;
if (targetText === 'AGN') {
// AGN使用精确匹配
isMatch = text === 'AGN';
} else if (targetText === 'LTD') {
// LTD使用精确匹配
isMatch = text === 'LTD';
} else {
// 其他文字使用包含匹配,但更严格
isMatch = text.includes(targetUpper) &&
!text.includes('AIR') &&
!text.includes('EQK') &&
!text.includes('ARN');
}
// 如果精确匹配失败,尝试模糊匹配
if (!isMatch && targetText !== 'AGN' && targetText !== 'LTD') {
isMatch = fuzzyMatch(text, targetUpper);
}
if (isMatch) {
// 使用与AGN相同的精确坐标转换方法
const scaleX = pageWidth / viewportWidth;
const scaleY = pageHeight / viewportHeight;
const convertedX = word.bbox.x0 * scaleX;
const convertedY = (viewportHeight - word.bbox.y1) * scaleY;
const convertedWidth = (word.bbox.x1 - word.bbox.x0) * scaleX;
const convertedHeight = (word.bbox.y1 - word.bbox.y0) * scaleY;
// 调试信息
console.log('找到目标文字:', {
originalText: word.text,
matchedText: targetText,
fullText: text,
originalBbox: word.bbox,
viewportSize: { width: viewportWidth, height: viewportHeight },
pageSize: { width: pageWidth, height: pageHeight },
scaleFactors: { scaleX, scaleY },
convertedCoords: { x: convertedX, y: convertedY, width: convertedWidth, height: convertedHeight }
});
foundTexts.push({
text: targetText,
fullText: word.text,
page: pageNum,
x: convertedX,
y: convertedY,
width: convertedWidth,
height: convertedHeight,
confidence: word.confidence / 100,
type: targetText.includes('AGN') ? 'agn' : 'uclink'
});
}
});
});
return foundTexts;
}
// 模糊匹配函数
function fuzzyMatch(str1, str2) {
const s1 = str1.replace(/[^A-Z]/g, '');
const s2 = str2.replace(/[^A-Z]/g, '');
if (s1.length === 0 || s2.length === 0) return false;
// 简单的相似度检查
const longer = s1.length > s2.length ? s1 : s2;
const shorter = s1.length > s2.length ? s2 : s1;
if (longer.length === 0) return true;
const distance = levenshteinDistance(longer, shorter);
return distance <= Math.max(1, longer.length * 0.3);
}
// 计算编辑距离
function levenshteinDistance(str1, str2) {
const matrix = [];
for (let i = 0; i <= str2.length; i++) {
matrix[i] = [i];
}
for (let j = 0; j <= str1.length; j++) {
matrix[0][j] = j;
}
for (let i = 1; i <= str2.length; i++) {
for (let j = 1; j <= str1.length; j++) {
if (str2.charAt(i - 1) === str1.charAt(j - 1)) {
matrix[i][j] = matrix[i - 1][j - 1];
} else {
matrix[i][j] = Math.min(
matrix[i - 1][j - 1] + 1,
matrix[i][j - 1] + 1,
matrix[i - 1][j] + 1
);
}
}
}
return matrix[str2.length][str1.length];
}
function displayAllRecognizedTexts(allRecognizedTexts) {
if (allRecognizedTexts.length === 0) {
return;
}
let html = '<h4>📝 所有识别到的文字:</h4>';
html += `<div class="text-detection">`;
html += `<h5>📄 第 1 页 (OCR识别到 ${allRecognizedTexts.length} 个文字)</h5>`;
// 按置信度排序,显示前50个文字
const sortedTexts = allRecognizedTexts
.sort((a, b) => b.confidence - a.confidence)
.slice(0, 50);
sortedTexts.forEach((word, index) => {
const confidence = word.confidence;
const isTarget = TARGET_TEXTS.some(target => word.text.includes(target));
html += `
<div class="text-item ${isTarget ? 'found' : ''}">
<div>
<strong>${word.text}</strong>
<div class="coordinate-info">
坐标: X=${word.bbox.x0.toFixed(1)}, Y=${word.bbox.y0.toFixed(1)} |
大小: ${(word.bbox.x1 - word.bbox.x0).toFixed(1)} x ${(word.bbox.y1 - word.bbox.y0).toFixed(1)} |
置信度: ${confidence.toFixed(0)}%
</div>
</div>
<div>
<span class="detected-text ${isTarget ? (word.text.includes('AGN') ? 'agn' : 'uclink') : ''}">${word.text}</span>
</div>
</div>
`;
});
if (allRecognizedTexts.length > 50) {
html += `<div class="info">... 还有 ${allRecognizedTexts.length - 50} 个文字未显示</div>`;
}
html += `</div>`;
// 在OCR结果区域前面插入所有识别文字
ocrResultsDiv.innerHTML = html + ocrResultsDiv.innerHTML;
}
function displayOCRResults(detectedTexts) {
if (detectedTexts.length === 0) {
ocrResultsDiv.innerHTML += '<div class="warning">⚠️ OCR未识别到目标文字</div>';
return;
}
let html = '<h4>🎯 目标文字识别结果:</h4>';
const textsByPage = {};
detectedTexts.forEach(text => {
if (!textsByPage[text.page]) {
textsByPage[text.page] = [];
}
textsByPage[text.page].push(text);
});
Object.keys(textsByPage).sort((a, b) => parseInt(a) - parseInt(b)).forEach(pageNum => {
const pageTexts = textsByPage[pageNum];
html += `<div class="text-detection">`;
html += `<h5>📄 第 ${parseInt(pageNum) + 1} 页 (找到 ${pageTexts.length} 个目标文字)</h5>`;
pageTexts.forEach((text) => {
html += `
<div class="text-item found">
<div>
<strong>${text.text}</strong>
<div class="coordinate-info">
坐标: X=${text.x.toFixed(1)}, Y=${text.y.toFixed(1)} |
大小: ${text.width.toFixed(1)} x ${text.height.toFixed(1)} |
置信度: ${(text.confidence * 100).toFixed(0)}%
</div>
</div>
<div>
<span class="detected-text ${text.type}">${text.text}</span>
</div>
</div>
`;
});
html += `</div>`;
});
const agnCount = detectedTexts.filter(t => t.type === 'agn').length;
const uclinkCount = detectedTexts.filter(t => t.type === 'uclink').length;
html += `
<div class="stats">
<h4>📊 目标文字识别统计</h4>
<p>AGN 文字: ${agnCount} 个</p>
<p>UCLINK LOGISITICS LTD 文字: ${uclinkCount} 个</p>
<p>总识别数: ${detectedTexts.length} 个</p>
<p>涉及页面: ${Object.keys(textsByPage).length} 页</p>
</div>
`;
ocrResultsDiv.innerHTML += html;
}
async function processHybrid() {
// 混合策略:先尝试OCR,失败时使用预设坐标
try {
await processOCR();
} catch (error) {
resultDiv.innerHTML = '<div class="progress">🔄 OCR失败,使用预设坐标...</div>';
await processFallback();
}
}
async function processFallback() {
// 回退策略:使用预设坐标
const pages = pdfDoc.getPages();
let processedPages = 0;
let totalRectangles = 0;
for (let i = 0; i < pages.length; i++) {
const page = pages[i];
const { width, height } = page.getSize();
// 超精确的预设坐标覆盖,包含LTD区域
const rectangles = [
{ x: 50, y: height - 200, width: 60, height: 10 }, // AGN
{ x: 50, y: height - 220, width: 100, height: 10 }, // UCLINK LOGISITICS
{ x: 155, y: height - 220, width: 30, height: 10 } // LTD
];
rectangles.forEach(rect => {
page.drawRectangle({
x: rect.x,
y: rect.y,
width: rect.width,
height: rect.height,
color: PDFLib.rgb(1, 1, 1)
});
totalRectangles++;
});
processedPages++;
resultDiv.innerHTML = `<div class="progress">🛡️ 回退策略:处理第 ${processedPages} 页,共 ${pages.length} 页...</div>`;
}
await saveAndDownload(processedPages, totalRectangles, 'fallback_processed');
}
async function saveAndDownload(processedPages, totalRectangles, suffix) {
const pdfBytesModified = await pdfDoc.save();
const blob = new Blob([pdfBytesModified], { type: 'application/pdf' });
const url = URL.createObjectURL(blob);
const a = document.createElement('a');
a.href = url;
a.download = `TK_43610263735_20251015_${suffix}.pdf`;
a.click();
setTimeout(() => URL.revokeObjectURL(url), 1000);
const precisionModes = ['超精确模式(±0像素)', '精确模式(±1像素)', '平衡模式(±2像素)', '安全模式(±3像素)', '宽松模式(±4像素)', '最安全模式(±5像素)'];
// 统计页码信息
const pageNumbers = allRecognizedTexts.filter(word => {
const text = word.text.trim();
return text.match(/^PAGE\s+\d+\s+OF\s+\d+$/i) || text.match(/^\d+\s*\/\s*\d+$/);
});
resultDiv.innerHTML = `
<div class="success">
✅ PDF OCR处理完成!<br>
📄 已处理 ${processedPages} 页<br>
🔲 添加了 ${totalRectangles} 个覆盖矩形<br>
💾 文件已下载:TK_43610263735_20251015_${suffix}.pdf<br>
🎯 使用策略:${getStrategyName(selectedStrategy)}<br>
🔧 坐标转换:缩放转换方法(已验证)<br>
📏 删除精度:${precisionModes[deletePrecision]}<br>
📄 检测到 ${pageNumbers.length} 个页码,已自动排除<br>
<strong>✅ 目标文字已被精确删除!</strong>
</div>
`;
}
// 快速设置精度函数
function setPrecision(value) {
deletePrecision = value;
const precisionSlider = document.getElementById('precisionSlider');
const precisionText = document.getElementById('precisionText');
if (precisionSlider) {
precisionSlider.value = value;
}
if (precisionText) {
const modes = ['超精确模式(±0像素)', '精确模式(±1像素)', '平衡模式(±2像素)', '安全模式(±3像素)', '宽松模式(±4像素)', '最安全模式(±5像素)'];
precisionText.textContent = modes[value];
}
}
function resetForm() {
fileInput.value = '';
document.getElementById('fileInfo').style.display = 'none';
processBtn.disabled = true;
resultDiv.innerHTML = '';
ocrResultsDiv.innerHTML = '';
pdfDoc = null;
pdfjsDocument = null;
selectedStrategy = 'ocr';
deletePrecision = 0; // 重置为超精确模式
document.querySelectorAll('.strategy-card').forEach(card => {
card.classList.remove('selected');
});
document.querySelector('.strategy-card').classList.add('selected');
// 重置精度设置
setPrecision(0);
}
</script>
</body>
</html>
......@@ -13,6 +13,7 @@
<field optional="show" name="customs_clearance_status" string="Customs Clearance Status"/>
<field optional="show" name="bl_no" string="Bill of Loading No."/>
<field optional="show" name="bl_date" string="B/L Date"/>
<field optional="show" name="last_mile_provider_ids" string="Last Mile Providers" widget="many2many_tags"/>
<field optional="hide" name="transfer_bl_no" string="Transfer Bill of Loading No."/>
<field optional="show" name="customer_id" string="Customer"/>
<field optional="show" name="customs_bl_no" string="Customs Bill of Loading No."/>
......@@ -140,6 +141,7 @@
<group>
<group>
<field name="bl_date" string="B/L Date"/>
<field name="last_mile_provider_ids" string="Last Mile Providers" widget="many2many_tags"/>
<field name="transfer_bl_no"/>
<field name="customer_id" string="Customer"/>
<field name="customs_bl_no" string="Customs Bill of Loading No."/>
......@@ -308,6 +310,8 @@
domain="[('cc_deadline', '&lt;', time.strftime('%Y-%m-%d'))]"/>
<separator/>
<group expand="0" string="Group By">
<!-- <filter domain="[]" name="groupby_last_mile_provider_ids" string="Last Mile Providers"
context="{'group_by': 'last_mile_provider_ids'}"/> -->
<filter domain="[]" name="groupby_customer_id" string="Customer"
context="{'group_by': 'customer_id'}"/>
<filter domain="[]" name="groupby_cc_company_id" string="CC Company"
......@@ -465,4 +469,18 @@
</field>
</record>
<!-- 获取尾程POD -->
<record id="bl_get_pod_info_server_action" model="ir.actions.server">
<field name="name">Batch Get POD Info</field>
<field name="model_id" ref="model_cc_bl"/>
<field name="binding_model_id" ref="model_cc_bl"/>
<field name="state">code</field>
<field name="binding_view_types">list,form</field>
<field name="groups_id" eval="[(4, ref('ccs_base.group_clearance_of_customs_user'))]"/>
<field name="code">
if records:
action = records.action_batch_get_pod_info()
</field>
</record>
</odoo>
\ No newline at end of file
......@@ -7,4 +7,5 @@ from . import add_exception_info_wizard
from . import update_bl_status_wizard
from . import batch_update_transfer_bl_no_wizard
from . import bl_done_wizard
from . import batch_get_pod_info_wizard
# -*- coding: utf-8 -*-
# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html).
import base64
import io
import logging
import requests
from odoo import models, fields, _
from odoo.exceptions import ValidationError
_logger = logging.getLogger(__name__)
class BatchGetPodInfoWizard(models.TransientModel):
_name = 'batch.get.pod.info.wizard'
_description = 'Batch Get POD Info Wizard' # 批量获取POD信息向导
def get_order(self):
"""
得到单据
:return:
"""
order_id = self._context.get('active_id')
if type(order_id) != list:
order_id = [self._context.get('active_id')]
return self.env['cc.bl'].browse(order_id)
sync_last_mile_pod = fields.Boolean(
string='Sync Last Mile POD', # 同步尾程POD
default=True,
help='Whether to sync last mile POD information' # 是否同步尾程POD信息
)
remove_specified_text = fields.Boolean(
string='Remove Specified Text', # 涂抹指定文字
default=True,
help='Whether to remove specified text from PDF files' # 是否涂抹PDF中的指定文字
)
# debug_mode = fields.Boolean(
# string='Debug Mode', # 调试模式
# default=False,
# help='Show red markers for deleted text positions' # 显示删除文字位置的红色标记
# )
def confirm(self):
"""
Confirm operation # 确认操作
"""
bl_objs = self.get_order()
# 调用接口获取提单pdf文件
pdf_file_arr = self._get_pdf_file_arr()
# 处理PDF文件,匹配提单对象
processed_files = self._match_bl_by_file_name(pdf_file_arr)
# 把没有匹配到文件的进行提示
error_bl = []
matched_bl_ids = [f['bl'].id for f in processed_files if f.get('bl')]
for bl in bl_objs:
if bl.id not in matched_bl_ids:
error_bl.append(bl)
if error_bl:
# 英文提示
raise ValidationError(_('%s bill of loading cannot find release note file') % (
', '.join([bl.bl_no for bl in error_bl]))) # xx提单无法找到release note文件
if self.remove_specified_text:
# 临时启用调试模式,查看删除位置
processed_files = self._remove_specified_text(processed_files, debug_mode=False)
# 用于测试的:保存处理后的PDF并返回下载链接
# if processed_files and processed_files[0].get('file_data'):
# return self._save_and_return_download_link(processed_files[0])
# 回写到附件信息
if processed_files:
# 回写PDF文件到清关文件
self._write_pdf_file(processed_files)
# 再同步和回写
if self.sync_last_mile_pod and processed_files:
self._sync_last_mile_pod(processed_files)
# 写一个方法掉接口获取提单pdf文件
def _get_pdf_file_arr(self):
"""
Get PDF file from API # 从API获取PDF文件
"""
# 获取当前选中的提单对象
bl_objs = self.get_order()
bill_numbers = [self.env['common.common'].sudo().process_match_str(bl.bl_no) for bl in bl_objs]
# 调用API获取PDF文件
api_url = self.env['ir.config_parameter'].sudo().get_param('last_mile_pod_api_url',
'http://172.104.52.150:7002')
if not api_url:
raise ValidationError(_('API URL not configured'))
# 构建请求数据
request_data = {
"bill_numbers": bill_numbers
}
try:
response = requests.post(
f"{api_url}/api/release-notes/pdfs",
headers={'Content-Type': 'application/json'},
json=request_data
)
if response.status_code == 200:
result = response.json()
# 检查API响应结构
if not result:
raise ValidationError(_('API returned empty response'))
if not result.get('success'):
error_msg = result.get('message', 'Unknown error')
raise ValidationError(_('API returned error: %s') % error_msg)
# 处理结果数据
results = result.get('results', [])
if not results:
raise ValidationError(_('No PDF files found in API response')) # 提示:API调用成功,但没有PDF文件
# 构建PDF文件数组
pdf_file_arr = []
for result_item in results:
if result_item.get('success'):
# 验证必要字段
bill_number = result_item.get('bill_number')
filename = result_item.get('filename')
base64_data = result_item.get('base64')
if not all([bill_number, filename, base64_data]):
_logger.warning(f"跳过无效的PDF文件项: {result_item}")
continue
# 验证PDF文件
try:
pdf_binary = base64.b64decode(base64_data)
# 验证PDF文件头
if not pdf_binary.startswith(b'%PDF-'):
_logger.warning(f"API返回的文件不是有效的PDF格式,提单号: {bill_number}")
continue
pdf_file_arr.append({
'bl_no': bill_number,
'file_name': filename,
'file_data': base64_data
})
except Exception as e:
_logger.warning(f"API PDF文件验证失败,提单号: {bill_number}, 错误: {str(e)}")
continue
return pdf_file_arr
else:
raise ValidationError(_('Failed to get PDF file from API: %s') % response.text)
except requests.exceptions.RequestException as e:
raise ValidationError(_('API request failed: %s') % str(e))
def _write_pdf_file(self, processed_files):
"""
Write PDF file to clearance files # 回写PDF文件到清关文件
:param processed_files: 处理后的文件数组
"""
for file_info in processed_files:
if not file_info['bl']:
continue
bl = file_info['bl']
file_name = file_info['file_name']
file_data = file_info['file_data']
# 创建新的清关文件记录
clearance_file = self.env['cc.clearance.file'].create({
'bl_id': bl.id,
'file_name': '尾程交接POD(待大包数量和箱号)',
'attachment_name': file_name,
'file': file_data
})
file_info['clearance_file'] = clearance_file
def _match_bl_by_file_name(self, pdf_file_arr):
"""
Match BL by file name and return processed array # 根据文件名匹配提单并返回处理后的数组
:param pdf_file_arr: PDF文件数组 [{'bill_number':'', 'filename':'', 'file_data':''}]
:return: 处理后的数组 [{'bl': bl_obj, 'file_name': 'xxx.pdf', 'file_data': 'xxx', 'matched': True/False}]
"""
bl_obj = self.get_order() # 获取当前选中的提单对象
processed_files = []
for bl in bl_obj:
select_bl_no = self.env['common.common'].sudo().process_match_str(bl.bl_no)
for pdf_file in pdf_file_arr:
# 尝试不同的字段名(API可能使用不同的字段名)
file_name = pdf_file.get('file_name') # 获取文件名
file_data = pdf_file.get('file_data') # 获取文件数据
bl_no = pdf_file.get('bl_no') # 获取提单号
if bl_no and select_bl_no == bl_no:
# 构建处理后的文件信息
processed_file = {
'bl': bl,
'file_name': file_name,
'file_data': file_data,
'bl_no': bl_no,
}
processed_files.append(processed_file)
break
return processed_files
def _sync_last_mile_pod(self, processed_files):
"""
Sync last mile POD information # 同步尾程POD信息
:param processed_files: 处理后的文件数组
"""
# return False#测试 先不同步
# 同步尾程POD信息
for file_info in processed_files:
if not file_info['bl']:
continue
bl = file_info['bl']
# 查找清关文件并执行同步
clearance_file = file_info.get('clearance_file')
if clearance_file:
clearance_file.action_sync() # 同步尾程POD
_logger.info(f"Successfully synced POD for BL {bl.bl_no}")
def _remove_specified_text(self, processed_files, debug_mode=False):
"""
Remove specified text from PDF files using OCR recognition # 使用OCR识别涂抹指定文字
:param processed_files: 处理后的文件数组
:param debug_mode: 是否显示调试标记
:return: 处理后的文件数组(包含处理后的PDF数据)
"""
updated_files = []
for file_info in processed_files:
if not file_info['bl']:
updated_files.append(file_info)
continue
bl = file_info['bl']
file_data = file_info['file_data']
processed_file_data = file_data # 默认使用原始数据
# 使用OCR识别和删除指定文字
if file_data:
# 将base64数据转换为二进制
pdf_binary = base64.b64decode(file_data)
# 使用OCR方法处理PDF
processed_pdf = self._process_pdf_with_ocr(
pdf_data=pdf_binary,
bl_no=bl.bl_no,
debug_mode=debug_mode
)
if processed_pdf:
# 将处理后的PDF转换回base64
processed_file_data = base64.b64encode(processed_pdf).decode('utf-8')
# 更新文件信息,使用处理后的PDF数据
updated_file_info = file_info.copy()
updated_file_info['file_data'] = processed_file_data
updated_files.append(updated_file_info)
return updated_files
def _process_pdf_with_ocr(self, pdf_data, bl_no, debug_mode=False):
"""
Process PDF with OCR recognition and text removal (完全按照HTML逻辑) # 使用OCR识别处理PDF并删除文字
:param pdf_data: PDF二进制数据
:param bl_no: 提单号(用于日志)
:param debug_mode: 是否显示调试标记
:return: 处理后的PDF二进制数据
"""
import fitz # PyMuPDF
import numpy as np
from PIL import Image
import pytesseract
# 尝试导入OpenCV,如果失败则使用PIL替代
try:
import cv2
cv2_available = True
_logger.info("OpenCV可用,使用OpenCV进行图像处理")
except ImportError as e:
cv2_available = False
_logger.warning(f"OpenCV不可用,使用PIL替代: {str(e)}")
_logger.warning("建议安装OpenCV: pip install opencv-python-headless")
# 设置Tesseract路径
self._setup_tesseract_path()
# 打开PDF文档
pdf_document = fitz.open(stream=pdf_data, filetype="pdf")
total_rectangles = 0
processed_pages = 0
detected_texts = []
all_recognized_texts = []
result_data = False
# 处理每一页(完全按照HTML逻辑)
for page_num in range(len(pdf_document)):
page = pdf_document[page_num]
_logger.info(f"正在OCR识别第{page_num + 1}页")
# 将页面转换为图像(与HTML完全一致)
mat = fitz.Matrix(2.0, 2.0) # 提高分辨率
pix = page.get_pixmap(matrix=mat)
img_data = pix.tobytes("png")
# 转换为PIL图像(兼容OpenCV和PIL)
if cv2_available:
# 使用OpenCV处理
nparr = np.frombuffer(img_data, np.uint8)
img = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
pil_img = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
else:
# 直接使用PIL处理
pil_img = Image.open(io.BytesIO(img_data))
if pil_img.mode != 'RGB':
pil_img = pil_img.convert('RGB')
# OCR配置(与HTML完全一致)
config = '--psm 6 --oem 1 -c tessedit_char_whitelist=ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789.,- -c preserve_interword_spaces=1 -c tessedit_do_invert=0 -c textord_min_linesize=1.0 -c classify_bln_numeric_mode=0 -c textord_force_make_prop_words=F -c textord_min_xheight=8 -c textord_tabfind_show_vlines=0'
# 使用Tesseract进行OCR识别
try:
ocr_data = pytesseract.image_to_data(
pil_img,
output_type=pytesseract.Output.DICT,
lang='eng',
config=config
)
except Exception as e:
_logger.error(f"OCR识别失败: {str(e)}")
continue
# 处理OCR结果(与HTML完全一致)
page_width = page.rect.width
page_height = page.rect.height
viewport_width = pil_img.width
viewport_height = pil_img.height
# 存储所有识别到的文字
page_recognized_texts = []
for i in range(len(ocr_data['text'])):
text = ocr_data['text'][i].strip()
if text:
page_recognized_texts.append({
'text': text,
'confidence': ocr_data['conf'][i],
'bbox': {
'x0': ocr_data['left'][i],
'y0': ocr_data['top'][i],
'x1': ocr_data['left'][i] + ocr_data['width'][i],
'y1': ocr_data['top'][i] + ocr_data['height'][i]
},
'page': page_num
})
all_recognized_texts.extend(page_recognized_texts)
# 调试:输出所有识别到的文字
_logger.info(f"第{page_num + 1}页识别到的所有文字: {[word['text'] for word in page_recognized_texts]}")
# 查找目标文字(完全按照HTML逻辑)
page_texts = self._find_target_texts(
page_recognized_texts,
page_num,
viewport_width,
viewport_height,
page_width,
page_height
)
detected_texts.extend(page_texts)
# 调试:输出检测到的目标文字
if page_texts:
_logger.info(f"第{page_num + 1}页检测到的目标文字: {[text['text'] for text in page_texts]}")
else:
_logger.info(f"第{page_num + 1}页未检测到目标文字")
# 根据OCR结果删除文字(完全按照HTML逻辑)
if page_texts:
for text_info in page_texts:
# 超精确删除模式(与HTML完全一致)
rect = {
'x': text_info['x'],
'y': text_info['y'],
'width': text_info['width'],
'height': text_info['height']
}
# 绘制背景色矩形标记删除位置
try:
if debug_mode:
# 调试模式:先绘制红色边框标记删除区域
page.draw_rect(
fitz.Rect(rect['x'], rect['y'], rect['x'] + rect['width'], rect['y'] + rect['height']),
color=(1, 0, 0), # 红色边框
fill=(1, 0.8, 0.8) # 浅红色填充
)
# 再绘制白色矩形覆盖文字
page.draw_rect(
fitz.Rect(rect['x'], rect['y'], rect['x'] + rect['width'], rect['y'] + rect['height']),
color=(1, 1, 1), # 白色
fill=(1, 1, 1) # 填充白色
)
else:
# 正常模式:直接绘制白色矩形覆盖文字
page.draw_rect(
fitz.Rect(rect['x'], rect['y'], rect['x'] + rect['width'], rect['y'] + rect['height']),
color=(1, 1, 1), # 白色
fill=(1, 1, 1) # 填充白色
)
_logger.info(
f"删除目标文字: {text_info['text']} 位置: x={rect['x']:.1f}, y={rect['y']:.1f}, w={rect['width']:.1f}, h={rect['height']:.1f}")
total_rectangles += 1
except Exception as e:
_logger.error(f"删除失败: {str(e)}")
processed_pages += 1
# 保存处理后的PDF
try:
output_buffer = io.BytesIO()
pdf_document.save(output_buffer, garbage=4, deflate=True, clean=True)
pdf_document.close()
result_data = output_buffer.getvalue()
output_buffer.close()
# 输出处理总结
_logger.info(
f"PDF处理完成 - 提单号: {bl_no}, 处理页数: {processed_pages}, 删除矩形数: {total_rectangles}, 检测到文字数: {len(detected_texts)}")
if detected_texts:
_logger.info(f"检测到的目标文字: {[text['text'] for text in detected_texts]}")
except Exception as e:
_logger.error(f"PDF保存失败: {str(e)}")
pdf_document.close()
return result_data
def _setup_tesseract_path(self):
"""
Setup Tesseract path for different systems # 为不同系统设置Tesseract路径
"""
# try:
import pytesseract
import os
import shutil
if os.name == 'nt': # Windows
# Windows常见路径
possible_paths = [
r'C:\Program Files\Tesseract-OCR\tesseract.exe',
r'C:\Program Files (x86)\Tesseract-OCR\tesseract.exe',
r'C:\Users\%USERNAME%\AppData\Local\Tesseract-OCR\tesseract.exe'
]
for path in possible_paths:
if os.path.exists(path):
pytesseract.pytesseract.tesseract_cmd = path
break
else: # Linux/Mac
# 检查Tesseract是否在PATH中
tesseract_path = shutil.which('tesseract')
if tesseract_path:
pytesseract.pytesseract.tesseract_cmd = tesseract_path
else:
# 尝试常见路径
possible_paths = [
'/usr/bin/tesseract',
'/usr/local/bin/tesseract',
'/opt/homebrew/bin/tesseract', # macOS M1
'/usr/local/Cellar/tesseract/*/bin/tesseract' # macOS Homebrew
]
for path in possible_paths:
if os.path.exists(path):
pytesseract.pytesseract.tesseract_cmd = path
break
# 检查语言数据文件
self._check_tessdata_files()
def _check_tessdata_files(self):
"""
Check if tessdata files exist # 检查tessdata文件是否存在
"""
import pytesseract
import os
# 获取Tesseract数据路径
tesseract_cmd = pytesseract.pytesseract.tesseract_cmd
tessdata_dir = os.path.dirname(tesseract_cmd) + '/tessdata'
# 如果tessdata目录不存在,尝试其他常见位置
if not os.path.exists(tessdata_dir):
possible_tessdata_dirs = [
'/usr/share/tesseract-ocr/tessdata',
'/usr/local/share/tesseract-ocr/tessdata',
'/opt/homebrew/share/tessdata', # macOS M1
'/usr/local/Cellar/tesseract/*/share/tessdata' # macOS Homebrew
]
for tessdata_path in possible_tessdata_dirs:
if os.path.exists(tessdata_path):
tessdata_dir = tessdata_path
break
# 检查英语语言数据文件
eng_data = os.path.join(tessdata_dir, 'eng.traineddata')
if os.path.exists(eng_data):
pass
else:
pass
def _find_target_texts(self, words, page_num, viewport_width, viewport_height, page_width, page_height):
"""
Find target texts using OCR results (完全按照HTML逻辑) # 使用OCR结果查找目标文字
"""
# 定义目标文字和排除文字(与HTML文件完全一致)
TARGET_TEXTS = ['AGN', 'UCLINK LOGISITICS LTD', 'UCLINK LOGISITICS', 'UCLINK', 'LOGISITICS', 'LOGISTICS', 'LTD',
'UCLINKLOGISITICSLTD']
EXCLUDE_TEXTS = ['AIR EQK', 'ARN', 'EQK', 'AIR', 'Page 1 of 1', 'Page 2 of 2', 'Page 3 of 3', 'Page 4 of 4',
'Page 5 of 5']
found_texts = []
for word in words:
text = word['text'].strip().upper()
# 首先检查是否在排除列表中(与HTML完全一致)
is_excluded = False
for exclude_text in EXCLUDE_TEXTS:
exclude_upper = exclude_text.upper()
if exclude_upper in text or text in exclude_upper:
is_excluded = True
break
# 检查页码模式(Page X of Y)(与HTML完全一致)
import re
if not is_excluded and (re.match(r'^PAGE\s+\d+\s+OF\s+\d+$', text) or re.match(r'^\d+\s*/\s*\d+$', text)):
is_excluded = True
if is_excluded:
# _logger.info(f"排除文字: {word['text']}")
continue
# 检查目标文字匹配(与HTML完全一致)
for target_text in TARGET_TEXTS:
target_upper = target_text.upper()
is_match = False
if target_text == 'AGN':
# AGN使用精确匹配
is_match = text == 'AGN'
elif target_text == 'LTD':
# LTD使用精确匹配
is_match = text == 'LTD'
else:
# 其他文字使用包含匹配,但更严格(与HTML完全一致)
is_match = target_upper in text and \
'AIR' not in text and \
'EQK' not in text and \
'ARN' not in text
# 如果精确匹配失败,尝试模糊匹配(与HTML完全一致)
if not is_match and target_text != 'AGN' and target_text != 'LTD':
is_match = self._fuzzy_match(text, target_upper)
if is_match:
# 坐标转换(适配PyMuPDF坐标系统)
scale_x = page_width / viewport_width
scale_y = page_height / viewport_height
# PyMuPDF使用左下角为原点,OCR使用左上角为原点
# 简化Y坐标转换:直接使用OCR的Y坐标,但调整到正确位置
converted_x = word['bbox']['x0'] * scale_x
converted_y = (word['bbox']['y0'] * scale_y) # 直接使用OCR的Y坐标
converted_width = (word['bbox']['x1'] - word['bbox']['x0']) * scale_x
converted_height = (word['bbox']['y1'] - word['bbox']['y0']) * scale_y
# 增加宽度和高度,确保完全覆盖文字
# 针对长文字使用更大的边距
if target_text in ['UCLINK LOGISITICS LTD', 'UCLINK LOGISITICS', 'UCLINKLOGISITICSLTD']:
# 长文字使用更大的边距
width_margin = max(20, converted_width * 0.3) # 至少20像素或30%的宽度边距
height_margin = max(5, converted_height * 0.3) # 至少5像素或30%的高度边距
else:
# 短文字使用标准边距
width_margin = max(10, converted_width * 0.2) # 至少10像素或20%的宽度边距
height_margin = max(3, converted_height * 0.2) # 至少3像素或20%的高度边距
# 记录原始尺寸
original_x = converted_x
original_y = converted_y
original_width = converted_width
original_height = converted_height
converted_x = max(0, converted_x - width_margin / 2)
converted_y = max(0, converted_y - height_margin / 2)
converted_width = min(page_width - converted_x, converted_width + width_margin)
converted_height = min(page_height - converted_y, converted_height + height_margin)
# 调试:显示边距计算过程
_logger.info(
f"文字 '{target_text}' 边距计算: 原始尺寸({original_width:.1f}x{original_height:.1f}) -> 边距({width_margin:.1f}x{height_margin:.1f}) -> 最终尺寸({converted_width:.1f}x{converted_height:.1f})")
found_texts.append({
'text': target_text,
'full_text': word['text'],
'page': page_num,
'x': converted_x,
'y': converted_y,
'width': converted_width,
'height': converted_height,
'confidence': word['confidence'] / 100,
'type': 'agn' if target_text == 'AGN' else 'uclink'
})
break
return found_texts
def _fuzzy_match(self, str1, str2):
"""
Fuzzy match function (与HTML完全一致) # 模糊匹配函数
"""
import re
s1 = re.sub(r'[^A-Z]', '', str1)
s2 = re.sub(r'[^A-Z]', '', str2)
if len(s1) == 0 or len(s2) == 0:
return False
# 计算编辑距离
distance = self._levenshtein_distance(s1, s2)
max_len = max(len(s1), len(s2))
# 如果编辑距离小于等于最大长度的1/3,认为匹配
return distance <= max_len / 3
def _levenshtein_distance(self, s1, s2):
"""
Calculate Levenshtein distance (与HTML完全一致) # 计算编辑距离
"""
if len(s1) < len(s2):
return self._levenshtein_distance(s2, s1)
if len(s2) == 0:
return len(s1)
previous_row = list(range(len(s2) + 1))
for i, c1 in enumerate(s1):
current_row = [i + 1]
for j, c2 in enumerate(s2):
insertions = previous_row[j + 1] + 1
deletions = current_row[j] + 1
substitutions = previous_row[j] + (c1 != c2)
current_row.append(min(insertions, deletions, substitutions))
previous_row = current_row
return previous_row[-1]
def _save_and_return_download_link(self, file_info):
"""
用于测试的 Save processed PDF as attachment and return download action # 保存处理后的PDF作为附件并返回下载动作
:param file_info: 处理后的文件信息
:return: Odoo action to download the file
"""
try:
# 获取处理后的PDF数据
file_data = file_info.get('file_data', '')
file_name = file_info.get('file_name', 'processed.pdf')
if not file_data:
raise ValidationError(_('No processed file data available')) # 提示:没有处理后的文件数据
# 解码base64数据
if isinstance(file_data, str):
pdf_binary = base64.b64decode(file_data)
else:
pdf_binary = file_data
# 确保PDF数据有效
if not pdf_binary.startswith(b'%PDF-'):
# 尝试修复:如果是base64字符串被错误处理
if isinstance(file_data, str) and len(file_data) > 100:
_logger.info("尝试重新解码base64数据...")
try:
# 重新尝试base64解码
pdf_binary_fixed = base64.b64decode(file_data)
if pdf_binary_fixed.startswith(b'%PDF-'):
_logger.info("✅ 重新解码成功,PDF数据有效")
pdf_binary = pdf_binary_fixed
else:
_logger.error("❌ 重新解码后仍然不是有效的PDF")
raise ValidationError(_('Invalid PDF data for saving: not a valid PDF format'))
except Exception as e:
_logger.error(f"重新解码失败: {str(e)}")
raise ValidationError(_('Invalid PDF data for saving: not a valid PDF format'))
else:
raise ValidationError(_('Invalid PDF data for saving: not a valid PDF format'))
# 验证PDF可以打开
try:
import fitz
test_doc = fitz.open(stream=pdf_binary, filetype="pdf")
_logger.info(f"PDF验证成功,页数: {len(test_doc)}")
test_doc.close()
except Exception as e:
_logger.error(f"PDF验证失败: {str(e)}")
raise ValidationError(_('Invalid PDF data for saving: cannot open PDF - %s') % str(e))
# 创建附件记录
attachment = self.env['ir.attachment'].create({
'name': f'processed_{file_name}',
'type': 'binary',
'datas': base64.b64encode(pdf_binary),
'mimetype': 'application/pdf',
'res_model': 'batch.get.pod.info.wizard',
'res_id': self.id,
})
# 返回下载动作
return {
'type': 'ir.actions.act_url',
'url': f'/web/content/{attachment.id}?download=true',
'target': 'new',
}
except Exception as e:
_logger.error(f"保存PDF附件失败: {str(e)}")
raise ValidationError(_('Failed to save PDF attachment: %s') % str(e))
<?xml version="1.0" encoding="utf-8"?>
<odoo>
<data>
<!-- Batch Get POD Info Wizard Form View 批量获取POD信息向导表单视图 -->
<record id="view_batch_get_pod_info_wizard_form" model="ir.ui.view">
<field name="name">batch.get.pod.info.wizard.form</field>
<field name="model">batch.get.pod.info.wizard</field>
<field name="arch" type="xml">
<form string="Batch Get POD Info"> <!-- 批量获取POD信息 -->
<sheet>
<group>
<group>
<field name="sync_last_mile_pod" widget="boolean_toggle"/>
</group>
<group>
<field name="remove_specified_text" widget="boolean_toggle"/>
</group>
</group>
<div class="alert alert-info" role="alert">
<strong>Description:</strong> <!-- 说明: -->
<ul>
<li><strong>Sync Last Mile POD:</strong> Synchronize POD (Proof of Delivery) attachment information with TK system, including big package quantities and container numbers</li> <!-- 同步尾程POD:向TK同步尾程交接POD(待大包数量和箱号)的附件信息 -->
<li><strong>Remove Specified Text:</strong> Remove specified text (AGN, UCLINK LOGISITICS LTD) from PDF files</li> <!-- 涂抹指定文字:对PDF文件中的指定文字进行涂抹处理 -->
</ul>
</div>
<footer>
<button string="Confirm" type="object" name="confirm" class="btn-primary"/>
<button string="Close" special="cancel"/>
</footer>
</sheet>
</form>
</field>
</record>
<!-- Batch Get POD Info Wizard Action 批量获取POD信息向导动作 -->
<record id="action_batch_get_pod_info_wizard" model="ir.actions.act_window">
<field name="name">Batch Get POD Info</field> <!-- 批量获取POD信息 -->
<field name="res_model">batch.get.pod.info.wizard</field>
<field name="view_mode">form</field>
<field name="target">new</field>
<field name="context">{}</field>
</record>
</data>
</odoo>
\ No newline at end of file
......@@ -2,7 +2,7 @@
# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html).
import logging
from datetime import datetime
from odoo import models, api, fields, _
from odoo.exceptions import ValidationError
......@@ -84,6 +84,23 @@ class BatchInputShipPackageStatusWizard(models.TransientModel):
node_exception_reason_id = fields.Many2one('cc.node.exception.reason', 'Exception Reason',
domain="[('code_id', '=', update_status)]")
# 添加时间对比提示字段
time_warning = fields.Text('Time Warning', compute='_compute_time_warning', store=False)
@api.depends('process_time')
def _compute_time_warning(self):
"""计算时间警告提示"""
for record in self:
record.time_warning = ""
if record.process_time:
now = datetime.now()
time_diff = now - record.process_time
hours_diff = time_diff.total_seconds() / 3600
time_warning_hours = self.env['ir.config_parameter'].sudo().get_param('time_warning_hours',24)
if hours_diff > int(time_warning_hours):
record.time_warning = _("The selected operation time exceeds %s hours, please confirm the operation time again") % time_warning_hours#提示:所选操作时间超过24小时,请再次确认操作时间
# 批量更新小包状态
def submit(self):
# 确认数据
......
......@@ -40,10 +40,16 @@
<!-- domain="[('id','in',next_code_ids),('node_type','=','package')]"-->
<field name="update_status" required="1" options="{'no_create':True,'no_open':True,}"/>
<field name="process_time" required="1" string="Process Time"/>
<field name="node_exception_reason_id" options="{'no_create':True}"/>
<field name="state_explain"/>
<field name="is_batch" invisible="1"/>
</group>
<!-- <group> -->
<div attrs="{'invisible': [('time_warning', '=', False),('time_warning', '=', '')]}" style="color: red; font-weight: bold;">
<field name="time_warning"/>
</div>
<!-- </group> -->
<group>
<field name="is_ok"/>
</group>
......
......@@ -407,6 +407,7 @@ class TTApi(http.Controller):
big_package_vals)
else:
big_package.write(big_package_vals)
# 生成cc.ship.package
package_list = big_bag.get(
'package_list') # 大包下的小包
......@@ -431,6 +432,8 @@ class TTApi(http.Controller):
'big_bag_no')
else:
res['msg'] = 'Big bag list is empty.'
#调用计算尾程服务商的方法
bl.compute_last_mile_provider_ids()
except Exception as e:
res['code'] = 5000
res['msg'] = 'system error: %s' % str(e)
......
pygtrans
PyMuPDF
opencv-python; sys_platform == "win32"
opencv-python-headless; sys_platform != "win32"
numpy
Pillow
tesseract
pytesseract
# 系统依赖安装说明:
# Windows系统:
# 1. 安装Tesseract OCR: 下载并安装 https://github.com/UB-Mannheim/tesseract/wiki
# 2. 安装OpenCV: pip install opencv-python
# Linux系统:
# 1. 更新包列表: sudo apt update
# 2. 安装Tesseract OCR: sudo apt install tesseract-ocr
# 3. 安装英语语言包: sudo apt install tesseract-ocr-eng
# 4. 安装OpenCV系统依赖: sudo apt install libopencv-dev python3-opencv
# 5. 安装OpenCV Python包: pip install opencv-python-headless
# macOS系统:
# 1. 安装Tesseract: brew install tesseract
# 2. 安装OpenCV: pip install opencv-python-headless
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论