diff --git a/README.md b/README.md index 67d5615..41594f7 100644 --- a/README.md +++ b/README.md @@ -4,6 +4,7 @@ Tools to manage an Ultrastar DX song library. Features include: 1. Deduplication 2. Organization +3. Recoding ## Setup @@ -42,3 +43,15 @@ python3 -m karaokatalog.deduplicate $SONG_LIBRARY ```bash python3 -m karaokatalog.organize $SONG_LIBRARY ``` + +### Recode + +**Re-encode all txt files into UTF-8.** This will use normal UTF-8, i.e., UTF-8 without BOM. If the txt uses UTF-8 with BOM, BOM is removed. + +⚠️ This will _irreversible_ change the encoding of the txt files (based on a guessed encoding). + +ℹ️ Deduplication is risk-reduced: We only change the encoding if we are reasonably certain our guessed encoding is correct. However, we could still make mistakes when detecting. + +```bash +python3 -m karaokatalog.recode $SONG_LIBRARY +``` diff --git a/karaokatalog/instructions/RecodeInstruction.py b/karaokatalog/instructions/RecodeInstruction.py new file mode 100644 index 0000000..1d6df4d --- /dev/null +++ b/karaokatalog/instructions/RecodeInstruction.py @@ -0,0 +1,18 @@ +from dataclasses import dataclass +from pathlib import Path + +from karaokatalog.instructions.Instruction import Instruction + + +@dataclass(frozen=True) +class RecodeInstruction(Instruction): + """ + Open the file at the given path with the given old encoding, and save it with the given new encoding. + """ + + path: Path + old_encoding: str + new_encoding: str + + def __call__(self) -> None: + raise NotImplementedError() diff --git a/karaokatalog/recode/__init__.py b/karaokatalog/recode/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/karaokatalog/recode/__main__.py b/karaokatalog/recode/__main__.py new file mode 100644 index 0000000..bdf31d4 --- /dev/null +++ b/karaokatalog/recode/__main__.py @@ -0,0 +1,30 @@ +import logging + +from tqdm import tqdm + +from karaokatalog.get_parser import get_parser +from karaokatalog.organize.recode import recode + +logging.basicConfig( + format="%(asctime)s [%(levelname)s] %(message)s", level=logging.INFO +) + + +if __name__ == "__main__": + args = get_parser( + "recode", "Recode all txt files in the directory into UTF-8" + ).parse_args() + logging.info("Recoding started") + + logging.info("Finding *.txt files") + txt_paths = list(tqdm(args.library_path.rglob("*.txt"), unit=" files")) + logging.info(f"{len(txt_paths)} txt files found") + + logging.info("Generating recode instructions") + recode_instructions = recode(txt_paths) + logging.info(f"{len(recode_instructions)} recode instructions generated") + + logging.warning(f"Recoding {len(recode_instructions)} files!") + for instruction in tqdm(recode_instructions, unit=" files"): + instruction() + logging.info("Recoding done") diff --git a/karaokatalog/recode/recode.py b/karaokatalog/recode/recode.py new file mode 100644 index 0000000..633d3f7 --- /dev/null +++ b/karaokatalog/recode/recode.py @@ -0,0 +1,8 @@ +from collections.abc import Sequence +from pathlib import Path + +from karaokatalog.instructions.RecodeInstruction import RecodeInstruction + + +def recode(paths: Sequence[Path]) -> Sequence[RecodeInstruction]: + raise NotImplementedError()