70 lines
2.2 KiB
Python
70 lines
2.2 KiB
Python
import argparse
|
|
import logging
|
|
from pathlib import Path
|
|
|
|
from tqdm import tqdm
|
|
|
|
from karaokatalog.deduplicate.find_duplicates import find_duplicates
|
|
from karaokatalog.deduplicate.prune import prune
|
|
from karaokatalog.Library import Library
|
|
from karaokatalog.Song import Song
|
|
from karaokatalog.util.get_equivalence_classes import get_equivalence_classes
|
|
|
|
logging.basicConfig(
|
|
format="%(asctime)s [%(levelname)s] %(message)s", level=logging.INFO
|
|
)
|
|
|
|
|
|
def parse_args() -> argparse.Namespace:
|
|
parser = argparse.ArgumentParser(
|
|
prog="python3 -m karaokatalog.deduplicate",
|
|
description="Deduplicate UltraStar Deluxe song libraries",
|
|
)
|
|
|
|
parser.add_argument(
|
|
"library_path",
|
|
type=Path,
|
|
help="The directory which contains the songs, the one you'd also configure UltraStar Deluxe to use",
|
|
)
|
|
|
|
return parser.parse_args()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
args = parse_args()
|
|
logging.info("Karaokatalog Deduplication started")
|
|
|
|
logging.info("Loading library")
|
|
library = Library.from_dir(args.library_path)
|
|
logging.info("Library loaded")
|
|
|
|
logging.info("Finding duplicates (songs with identical title and artist)")
|
|
duplicates = find_duplicates(library.songs_in_unique_dirs)
|
|
logging.info("Duplicates found")
|
|
|
|
logging.info("Finding exact duplicates (files are 100% identical)")
|
|
# TODO Make this abortable and restartable
|
|
exact_duplicates = tuple(
|
|
equivalence_class
|
|
for songs in tqdm(duplicates.values(), unit=" duplicates")
|
|
for equivalence_class in get_equivalence_classes(songs, Song.has_identic_files)
|
|
)
|
|
logging.info("Exact duplicates found")
|
|
|
|
logging.info("Determining songs to prune")
|
|
pruning_instructions = tuple(
|
|
instruction
|
|
for equivalence_class in tqdm(exact_duplicates, unit=" exact duplicates")
|
|
for instruction in prune(equivalence_class)
|
|
)
|
|
logging.info(
|
|
f"{len(pruning_instructions)} exactly duplicated songs will be deleted"
|
|
)
|
|
|
|
logging.warning(f"Deleting {len(pruning_instructions)} songs!")
|
|
for instruction in tqdm(pruning_instructions, unit=" songs"):
|
|
instruction()
|
|
logging.info("Deletion done")
|
|
|
|
logging.info("Karaokatalog Deduplication done")
|