提交 6fe53315 authored 作者: 贺阳's avatar 贺阳

Merge branch 'develop' into feature/PDA扫码揽收

# Please enter a commit message to explain why this merge is necessary, # especially if it merges an updated upstream into a topic branch. # # Lines starting with '#' will be ignored, and an empty message aborts # the commit.
...@@ -26,6 +26,19 @@ ...@@ -26,6 +26,19 @@
<field name="value">20</field> <field name="value">20</field>
</record> </record>
<!-- 增加百度云appid apikey secretkey的系统参数 -->
<record id="baidu_ocr_app_id" model="ir.config_parameter">
<field name="key">baidu_ocr_app_id</field>
<field name="value">118782515</field>
</record>
<record id="baidu_ocr_api_key" model="ir.config_parameter">
<field name="key">baidu_ocr_api_key</field>
<field name="value">gWnGCmjJYzaYwhph8sJEdiRJ</field>
</record>
<record id="baidu_ocr_secret_key" model="ir.config_parameter">
<field name="key">baidu_ocr_secret_key</field>
<field name="value">mjgUUgbxXK8UHcRi5MTlPrb4BWM8NrOu</field>
</record>
</data> </data>
</odoo> </odoo>
\ No newline at end of file
...@@ -12,6 +12,9 @@ import pdfplumber ...@@ -12,6 +12,9 @@ import pdfplumber
import xlrd import xlrd
from odoo import models from odoo import models
from odoo.exceptions import ValidationError from odoo.exceptions import ValidationError
import tempfile
from aip.ocr import AipOcr
from pdf2image import convert_from_path
_logger = logging.getLogger(__name__) _logger = logging.getLogger(__name__)
import html import html
...@@ -161,12 +164,25 @@ class OrderStateChangeRule(models.Model): ...@@ -161,12 +164,25 @@ class OrderStateChangeRule(models.Model):
_logger.info(f"上传文件 {file_obj.attachment_name} 失败,已尝试 {max_retries} 次,仍然失败。") _logger.info(f"上传文件 {file_obj.attachment_name} 失败,已尝试 {max_retries} 次,仍然失败。")
break # 超过最大重试次数后跳出循环 break # 超过最大重试次数后跳出循环
def get_pdf_order_data(self, attachment_data):
"""识别PDF数据方法"""
order_no, date_str = self.read_pdf(attachment_data)
if not order_no:
_logger.info('未识别到提单号,开始调用百度OCR识别')
error_msg, result = self.get_pdf_waybill_content_by_baidu(attachment_data)
if error_msg:
_logger.error('百度OCR识别错误 : %s' % error_msg)
else:
order_no = result['order_no']
date_str = result['date_str']
return order_no, date_str
def fetch_mail_dlv_attachment(self, **kwargs): def fetch_mail_dlv_attachment(self, **kwargs):
attachment_arr = kwargs['attachment_arr'] attachment_arr = kwargs['attachment_arr']
for attachment_tuple in attachment_arr: for attachment_tuple in attachment_arr:
try: try:
attachment_name, attachment_data = attachment_tuple attachment_name, attachment_data = attachment_tuple
order_no, date_str = self.read_pdf(attachment_data) order_no, date_str = self.get_pdf_order_data(attachment_data)
if order_no: if order_no:
# 转换为 datetime 对象 # 转换为 datetime 对象
local_time = datetime.strptime(date_str.replace(' ', ''), '%d/%m/%Y%H:%M:%S') local_time = datetime.strptime(date_str.replace(' ', ''), '%d/%m/%Y%H:%M:%S')
...@@ -303,3 +319,70 @@ class OrderStateChangeRule(models.Model): ...@@ -303,3 +319,70 @@ class OrderStateChangeRule(models.Model):
if month in pick_date_text: if month in pick_date_text:
return int(month_abbr_arr.index(mon)) return int(month_abbr_arr.index(mon))
return 0 return 0
def convert_image_by_pdf_attachment(self, attachment):
error_msg = img_path = ''
attachment_content = attachment
if attachment_content:
# 创建临时文件保存PDF
with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as temp_pdf:
temp_pdf.write(attachment_content)
pdf_path = temp_pdf.name
try:
# 使用pdf2image将PDF转换为图片
# 本地代码 需要指定
# poppler_path = r"E:\poppler-23.08.0\Library\bin"
# images = convert_from_path(pdf_path, poppler_path=poppler_path)
# 非本地代码
images = convert_from_path(pdf_path)#如果文件损坏的会报错,需要处理
# 保存每一页为图片文件
for i, image in enumerate(images):
if i == 0:
img_path = f"{pdf_path}_{i}.jpg"
image.save(img_path, 'JPEG')
return img_path, error_msg
except Exception as e:
error_msg = '%s' % str(e)
return img_path, error_msg
def get_pdf_waybill_content_by_baidu(self, attachment):
"""
根据图片获取内容
:param :path base64
"""
img_path, error_msg = self.convert_image_by_pdf_attachment(attachment)
result = False
if error_msg:
return error_msg, result
file = open(img_path, 'rb').read()
app_id = self.env['ir.config_parameter'].sudo().get_param('baidu_ocr_app_id')
api_key = self.env['ir.config_parameter'].sudo().get_param('baidu_ocr_api_key')
secret_key = self.env['ir.config_parameter'].sudo().get_param('baidu_ocr_secret_key')
ocr = AipOcr(app_id, api_key, secret_key)
result = ocr.basicGeneral(file)
if not isinstance(result, str):
if not result.get('error_msg'):
words_result = result.get('words_result')
# 1. 先把所有words提取成列表,方便后续定位
words_list = [item['words'] for item in words_result]
# 3. 提取各字段
# 提单号
order_no = None
ze_2 = re.compile("\\d{3}-\\d{8,}")
# 清关开始日期
date_str = None
pattern = r'\d{2}/\d{2}/\d{4}\s*\d{2}:\d{2}:\d{2}'
for w in words_list:
if order_no and date_str:
break
if ze_2.findall(w):
order_no = ze_2.findall(w)[0]
matches = re.findall(pattern, w)
if matches:
date_str = matches[0]
# 4. 组装最终结果
result = {
'order_no': order_no,
'date_str': date_str
}
return error_msg, result
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论