From 59fd6eefb6a3c3424aa769a8ed88cc964dd4a980 Mon Sep 17 00:00:00 2001 From: Jakob Moser Date: Wed, 21 May 2025 13:43:02 +0200 Subject: [PATCH] Draft main deduplication code --- karaokatalog/deduplicate/__main__.py | 46 ++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 karaokatalog/deduplicate/__main__.py diff --git a/karaokatalog/deduplicate/__main__.py b/karaokatalog/deduplicate/__main__.py new file mode 100644 index 0000000..8035660 --- /dev/null +++ b/karaokatalog/deduplicate/__main__.py @@ -0,0 +1,46 @@ +from karaokatalog.Library import Library +from karaokatalog.Song import Song +from karaokatalog.deduplicate.find_duplicates import find_duplicates +from karaokatalog.deduplicate.prune import prune +from karaokatalog.util.get_equivalence_classes import get_equivalence_classes + +from pathlib import Path +from tqdm import tqdm +import sys +import logging + +logging.basicConfig( + format="%(asctime)s [%(levelname)s] %(message)s", level=logging.INFO +) + +if __name__ == "__main__": + logging.info("Karaokatalog Deduplication started") + + logging.info("Loading library") + library = Library.from_dir(Path(sys.argv[1])) + logging.info("Library loaded") + + logging.info("Finding duplicates (songs with identical title and artist)") + duplicates = find_duplicates(library) + logging.info("Duplicates found") + + logging.info("Finding exact duplicates (files are 100% identical)") + # TODO Make this abortable and restartable + exact_duplicates = tuple( + equivalence_class + for songs in tqdm(duplicates.values(), unit=" duplicates") + for equivalence_class in get_equivalence_classes(songs, Song.has_identic_files) + ) + logging.info("Exact duplicates found") + + logging.info("Determining songs to prune") + pruning_instructions = tuple( + instruction + for equivalence_class in tqdm(exact_duplicates, unit=" exact duplicates") + for instruction in prune(equivalence_class) + ) + logging.info(f"{len(pruning_instructions)} exactly duplicated songs will be deleted") + + # TODO Call all pruning_instructions, to actually delete the files + + logging.info("Karaokatalog Deduplication done")