fix: Carmodoo search encoding issue - Korean car names garbled

lxml was re-encoding already decoded UTF-8 HTML based on charset="euc-kr"
meta tag. Fixed by removing charset meta tags and explicitly setting
UTF-8 encoding in HTMLParser.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
AutonetSellCar Deploy
2026-02-01 22:11:06 +09:00
parent ed48cac820
commit bea89d0580

View File

@@ -380,7 +380,14 @@ class CarmodooClient:
cars = []
try:
tree = lxml_html.fromstring(html)
# HTML 내부의 charset 선언 제거 (이미 UTF-8로 디코딩됨)
html = re.sub(r'<meta[^>]*charset[^>]*>', '', html, flags=re.IGNORECASE)
html = re.sub(r'charset\s*=\s*["\']?euc-kr["\']?', 'charset="utf-8"', html, flags=re.IGNORECASE)
# lxml에 UTF-8 인코딩임을 명시
from lxml.html import HTMLParser
parser = HTMLParser(encoding='utf-8')
tree = lxml_html.document_fromstring(html.encode('utf-8'), parser=parser)
# 각 차량 행 찾기 (tr id="trCtl_XXXXXXX")
car_rows = tree.xpath('//tr[starts-with(@id, "trCtl_")]')