diff --git a/karaokatalog/parse_song_txt.py b/karaokatalog/parse_song_txt.py index edb2839..5801b69 100644 --- a/karaokatalog/parse_song_txt.py +++ b/karaokatalog/parse_song_txt.py @@ -26,8 +26,20 @@ def _parse_tag_line(tag_line: str) -> tuple[str, str | None]: ) -def parse_song_txt(song_txt: Path) -> dict[str, Any]: - with song_txt.open(encoding="utf-8", errors="ignore") as f: +def _parse_song_txt_with_encoding(song_txt: Path, encoding: str) -> dict[str, Any]: + with song_txt.open(encoding=encoding) as f: tags = dict(_parse_tag_line(line) for line in f if line.startswith("#")) return tags + + +def parse_song_txt(song_txt: Path) -> dict[str, Any]: + encodings_to_try = ("utf-8", "cp1252") + + for encoding_to_try in encodings_to_try: + try: + return _parse_song_txt_with_encoding(song_txt, encoding_to_try) + except UnicodeDecodeError: + pass + + raise UnicodeError(f"Could not guess encoding for {song_txt}")