Implement recoding
This commit is contained in:
parent
bbdaa34d7c
commit
85ba86596a
@ -3,6 +3,36 @@ from pathlib import Path
|
||||
|
||||
from karaokatalog.instructions.RecodeInstruction import RecodeInstruction
|
||||
|
||||
ENCODINGS_TO_TRY = (
|
||||
"utf-8",
|
||||
"cp1252",
|
||||
)
|
||||
|
||||
BOM = "\ufeff" # Byte Order Mark, totally useless for UTF-8, still sometimes there
|
||||
|
||||
|
||||
def try_reading_file_with_encoding(path: Path, encoding: str) -> str:
|
||||
with path.open("r", encoding=encoding) as f:
|
||||
return f.read()
|
||||
|
||||
|
||||
def guess_encoding(path: Path) -> str:
|
||||
for encoding_to_try in ENCODINGS_TO_TRY:
|
||||
try:
|
||||
content = try_reading_file_with_encoding(path, encoding_to_try)
|
||||
if encoding_to_try == "utf-8" and content.startswith(BOM):
|
||||
return "utf-8-sig"
|
||||
else:
|
||||
return encoding_to_try
|
||||
except UnicodeDecodeError:
|
||||
pass # This was not the right encoding, let's try again
|
||||
|
||||
raise UnicodeError("Could not guess encoding.")
|
||||
|
||||
|
||||
def recode(paths: Sequence[Path]) -> Sequence[RecodeInstruction]:
|
||||
raise NotImplementedError()
|
||||
return [
|
||||
RecodeInstruction(path, old_encoding=old_encoding, new_encoding="utf-8")
|
||||
for path in paths
|
||||
if (old_encoding := guess_encoding(path)) != "utf-8"
|
||||
]
|
||||
|
Loading…
x
Reference in New Issue
Block a user