From c47c4817315bc02bba0218afa5d3c4209dd38ec8 Mon Sep 17 00:00:00 2001 From: Jakob Moser Date: Sun, 1 Jun 2025 12:53:06 +0200 Subject: [PATCH] Try more encodings when parsing song file --- karaokatalog/parse_song_txt.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/karaokatalog/parse_song_txt.py b/karaokatalog/parse_song_txt.py index edb2839..5801b69 100644 --- a/karaokatalog/parse_song_txt.py +++ b/karaokatalog/parse_song_txt.py @@ -26,8 +26,20 @@ def _parse_tag_line(tag_line: str) -> tuple[str, str | None]: ) -def parse_song_txt(song_txt: Path) -> dict[str, Any]: - with song_txt.open(encoding="utf-8", errors="ignore") as f: +def _parse_song_txt_with_encoding(song_txt: Path, encoding: str) -> dict[str, Any]: + with song_txt.open(encoding=encoding) as f: tags = dict(_parse_tag_line(line) for line in f if line.startswith("#")) return tags + + +def parse_song_txt(song_txt: Path) -> dict[str, Any]: + encodings_to_try = ("utf-8", "cp1252") + + for encoding_to_try in encodings_to_try: + try: + return _parse_song_txt_with_encoding(song_txt, encoding_to_try) + except UnicodeDecodeError: + pass + + raise UnicodeError(f"Could not guess encoding for {song_txt}")