Compare commits
No commits in common. "85ba86596a25c9d9ede9cde0523583071157a707" and "4c6ef77a27dd025db0e33429c36baf0ea45b6d8f" have entirely different histories.
85ba86596a
...
4c6ef77a27
@ -3,7 +3,7 @@ import logging
|
|||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
|
|
||||||
from karaokatalog.get_parser import get_parser
|
from karaokatalog.get_parser import get_parser
|
||||||
from karaokatalog.recode.recode import recode
|
from karaokatalog.organize.recode import recode
|
||||||
|
|
||||||
logging.basicConfig(
|
logging.basicConfig(
|
||||||
format="%(asctime)s [%(levelname)s] %(message)s", level=logging.INFO
|
format="%(asctime)s [%(levelname)s] %(message)s", level=logging.INFO
|
||||||
|
@ -3,36 +3,6 @@ from pathlib import Path
|
|||||||
|
|
||||||
from karaokatalog.instructions.RecodeInstruction import RecodeInstruction
|
from karaokatalog.instructions.RecodeInstruction import RecodeInstruction
|
||||||
|
|
||||||
ENCODINGS_TO_TRY = (
|
|
||||||
"utf-8",
|
|
||||||
"cp1252",
|
|
||||||
)
|
|
||||||
|
|
||||||
BOM = "\ufeff" # Byte Order Mark, totally useless for UTF-8, still sometimes there
|
|
||||||
|
|
||||||
|
|
||||||
def try_reading_file_with_encoding(path: Path, encoding: str) -> str:
|
|
||||||
with path.open("r", encoding=encoding) as f:
|
|
||||||
return f.read()
|
|
||||||
|
|
||||||
|
|
||||||
def guess_encoding(path: Path) -> str:
|
|
||||||
for encoding_to_try in ENCODINGS_TO_TRY:
|
|
||||||
try:
|
|
||||||
content = try_reading_file_with_encoding(path, encoding_to_try)
|
|
||||||
if encoding_to_try == "utf-8" and content.startswith(BOM):
|
|
||||||
return "utf-8-sig"
|
|
||||||
else:
|
|
||||||
return encoding_to_try
|
|
||||||
except UnicodeDecodeError:
|
|
||||||
pass # This was not the right encoding, let's try again
|
|
||||||
|
|
||||||
raise UnicodeError("Could not guess encoding.")
|
|
||||||
|
|
||||||
|
|
||||||
def recode(paths: Sequence[Path]) -> Sequence[RecodeInstruction]:
|
def recode(paths: Sequence[Path]) -> Sequence[RecodeInstruction]:
|
||||||
return [
|
raise NotImplementedError()
|
||||||
RecodeInstruction(path, old_encoding=old_encoding, new_encoding="utf-8")
|
|
||||||
for path in paths
|
|
||||||
if (old_encoding := guess_encoding(path)) != "utf-8"
|
|
||||||
]
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user