Compare commits

...

2 Commits

Author SHA1 Message Date
fa3df3d2e2
Add skeletons for recoding 2025-06-01 16:23:35 +02:00
92a001bf52
Reformat file 2025-06-01 16:22:49 +02:00
6 changed files with 72 additions and 1 deletions

View File

@ -4,6 +4,7 @@ Tools to manage an Ultrastar DX song library. Features include:
1. Deduplication
2. Organization
3. Recoding
## Setup
@ -42,3 +43,15 @@ python3 -m karaokatalog.deduplicate $SONG_LIBRARY
```bash
python3 -m karaokatalog.organize $SONG_LIBRARY
```
### Recode
**Re-encode all txt files into UTF-8.** This will use normal UTF-8, i.e., UTF-8 without BOM. If the txt uses UTF-8 with BOM, BOM is removed.
⚠️ This will _irreversible_ change the encoding of the txt files (based on a guessed encoding).
Deduplication is risk-reduced: We only change the encoding if we are reasonably certain our guessed encoding is correct. However, we could still make mistakes when detecting.
```bash
python3 -m karaokatalog.recode $SONG_LIBRARY
```

View File

@ -0,0 +1,18 @@
from dataclasses import dataclass
from pathlib import Path
from karaokatalog.instructions.Instruction import Instruction
@dataclass(frozen=True)
class RecodeInstruction(Instruction):
"""
Open the file at the given path with the given old encoding, and save it with the given new encoding.
"""
path: Path
old_encoding: str
new_encoding: str
def __call__(self) -> None:
raise NotImplementedError()

View File

@ -30,7 +30,9 @@ if __name__ == "__main__":
try:
instruction()
except FileExistsError:
logging.error(f"Could not move {instruction.old_path} -> {instruction.new_path}")
logging.error(
f"Could not move {instruction.old_path} -> {instruction.new_path}"
)
logging.info("Moving done")
logging.info("Karaokatalog Organization done")

View File

View File

@ -0,0 +1,30 @@
import logging
from tqdm import tqdm
from karaokatalog.get_parser import get_parser
from karaokatalog.organize.recode import recode
logging.basicConfig(
format="%(asctime)s [%(levelname)s] %(message)s", level=logging.INFO
)
if __name__ == "__main__":
args = get_parser(
"recode", "Recode all txt files in the directory into UTF-8"
).parse_args()
logging.info("Recoding started")
logging.info("Finding *.txt files")
txt_paths = list(tqdm(args.library_path.rglob("*.txt"), unit=" files"))
logging.info(f"{len(txt_paths)} txt files found")
logging.info("Generating recode instructions")
recode_instructions = recode(txt_paths)
logging.info(f"{len(recode_instructions)} recode instructions generated")
logging.warning(f"Recoding {len(recode_instructions)} files!")
for instruction in tqdm(recode_instructions, unit=" files"):
instruction()
logging.info("Recoding done")

View File

@ -0,0 +1,8 @@
from collections.abc import Sequence
from pathlib import Path
from karaokatalog.instructions.RecodeInstruction import RecodeInstruction
def recode(paths: Sequence[Path]) -> Sequence[RecodeInstruction]:
raise NotImplementedError()