Add skeletons for recoding

This commit is contained in:
Jakob Moser 2025-06-01 16:23:35 +02:00
parent 92a001bf52
commit fa3df3d2e2
Signed by: jakob
GPG Key ID: 3EF2BA2851B3F53C
5 changed files with 69 additions and 0 deletions

View File

@ -4,6 +4,7 @@ Tools to manage an Ultrastar DX song library. Features include:
1. Deduplication
2. Organization
3. Recoding
## Setup
@ -42,3 +43,15 @@ python3 -m karaokatalog.deduplicate $SONG_LIBRARY
```bash
python3 -m karaokatalog.organize $SONG_LIBRARY
```
### Recode
**Re-encode all txt files into UTF-8.** This will use normal UTF-8, i.e., UTF-8 without BOM. If the txt uses UTF-8 with BOM, BOM is removed.
⚠️ This will _irreversible_ change the encoding of the txt files (based on a guessed encoding).
Deduplication is risk-reduced: We only change the encoding if we are reasonably certain our guessed encoding is correct. However, we could still make mistakes when detecting.
```bash
python3 -m karaokatalog.recode $SONG_LIBRARY
```

View File

@ -0,0 +1,18 @@
from dataclasses import dataclass
from pathlib import Path
from karaokatalog.instructions.Instruction import Instruction
@dataclass(frozen=True)
class RecodeInstruction(Instruction):
"""
Open the file at the given path with the given old encoding, and save it with the given new encoding.
"""
path: Path
old_encoding: str
new_encoding: str
def __call__(self) -> None:
raise NotImplementedError()

View File

View File

@ -0,0 +1,30 @@
import logging
from tqdm import tqdm
from karaokatalog.get_parser import get_parser
from karaokatalog.organize.recode import recode
logging.basicConfig(
format="%(asctime)s [%(levelname)s] %(message)s", level=logging.INFO
)
if __name__ == "__main__":
args = get_parser(
"recode", "Recode all txt files in the directory into UTF-8"
).parse_args()
logging.info("Recoding started")
logging.info("Finding *.txt files")
txt_paths = list(tqdm(args.library_path.rglob("*.txt"), unit=" files"))
logging.info(f"{len(txt_paths)} txt files found")
logging.info("Generating recode instructions")
recode_instructions = recode(txt_paths)
logging.info(f"{len(recode_instructions)} recode instructions generated")
logging.warning(f"Recoding {len(recode_instructions)} files!")
for instruction in tqdm(recode_instructions, unit=" files"):
instruction()
logging.info("Recoding done")

View File

@ -0,0 +1,8 @@
from collections.abc import Sequence
from pathlib import Path
from karaokatalog.instructions.RecodeInstruction import RecodeInstruction
def recode(paths: Sequence[Path]) -> Sequence[RecodeInstruction]:
raise NotImplementedError()