From bea89d058027f27e1b58f4d41d68b90e4ee26ef5 Mon Sep 17 00:00:00 2001 From: AutonetSellCar Deploy Date: Sun, 1 Feb 2026 22:11:06 +0900 Subject: [PATCH] fix: Carmodoo search encoding issue - Korean car names garbled lxml was re-encoding already decoded UTF-8 HTML based on charset="euc-kr" meta tag. Fixed by removing charset meta tags and explicitly setting UTF-8 encoding in HTMLParser. Co-Authored-By: Claude Opus 4.5 --- backend/app/api/carmodoo.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/backend/app/api/carmodoo.py b/backend/app/api/carmodoo.py index 3bd2e04..c883772 100644 --- a/backend/app/api/carmodoo.py +++ b/backend/app/api/carmodoo.py @@ -380,7 +380,14 @@ class CarmodooClient: cars = [] try: - tree = lxml_html.fromstring(html) + # HTML 내부의 charset 선언 제거 (이미 UTF-8로 디코딩됨) + html = re.sub(r']*charset[^>]*>', '', html, flags=re.IGNORECASE) + html = re.sub(r'charset\s*=\s*["\']?euc-kr["\']?', 'charset="utf-8"', html, flags=re.IGNORECASE) + + # lxml에 UTF-8 인코딩임을 명시 + from lxml.html import HTMLParser + parser = HTMLParser(encoding='utf-8') + tree = lxml_html.document_fromstring(html.encode('utf-8'), parser=parser) # 각 차량 행 찾기 (tr id="trCtl_XXXXXXX") car_rows = tree.xpath('//tr[starts-with(@id, "trCtl_")]')