From 209c63e463a0c85e0d4785a98bc7ff43f64b86c8 Mon Sep 17 00:00:00 2001 From: AutonetSellCar Deploy Date: Sun, 1 Feb 2026 22:24:00 +0900 Subject: [PATCH] fix: EUC-KR decoding failure causing garbled Korean text The Carmodoo HTML response sometimes contains invalid EUC-KR byte sequences (e.g., 0xA4 followed by ASCII 'F'). This caused the decoder to fall back to Latin-1, corrupting all Korean text. Fixed by using errors='replace' which preserves Korean text while replacing only the invalid byte sequences with replacement characters. Co-Authored-By: Claude Opus 4.5 --- backend/app/api/carmodoo.py | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/backend/app/api/carmodoo.py b/backend/app/api/carmodoo.py index c883772..f171c01 100644 --- a/backend/app/api/carmodoo.py +++ b/backend/app/api/carmodoo.py @@ -96,24 +96,15 @@ class CarmodooClient: } def _decode_response(self, content: bytes) -> str: - """EUC-KR 응답 디코딩""" - try: - return content.decode('euc-kr') - except UnicodeDecodeError: - try: - return content.decode('utf-8') - except UnicodeDecodeError: - return content.decode('latin-1') + """EUC-KR 응답 디코딩 - 유효하지 않은 바이트는 대체문자로 처리""" + # errors='replace'를 사용하여 유효하지 않은 바이트 시퀀스를 대체문자(�)로 처리 + # 이렇게 하면 일부 특수문자가 깨지더라도 한글 텍스트는 보존됨 + return content.decode('euc-kr', errors='replace') def _clean_xml_bytes(self, content: bytes) -> bytes: """XML 정리""" - try: - text = content.decode('euc-kr') - except UnicodeDecodeError: - try: - text = content.decode('utf-8') - except UnicodeDecodeError: - text = content.decode('latin-1') + # errors='replace'를 사용하여 유효하지 않은 바이트 시퀀스 처리 + text = content.decode('euc-kr', errors='replace') text = re.sub(r'^[0-9a-fA-F]+\r?\n', '', text, flags=re.MULTILINE) text = text.strip()