Draft main deduplication code
This commit is contained in:
parent
8ee036e380
commit
59fd6eefb6
46
karaokatalog/deduplicate/__main__.py
Normal file
46
karaokatalog/deduplicate/__main__.py
Normal file
@ -0,0 +1,46 @@
|
||||
from karaokatalog.Library import Library
|
||||
from karaokatalog.Song import Song
|
||||
from karaokatalog.deduplicate.find_duplicates import find_duplicates
|
||||
from karaokatalog.deduplicate.prune import prune
|
||||
from karaokatalog.util.get_equivalence_classes import get_equivalence_classes
|
||||
|
||||
from pathlib import Path
|
||||
from tqdm import tqdm
|
||||
import sys
|
||||
import logging
|
||||
|
||||
logging.basicConfig(
|
||||
format="%(asctime)s [%(levelname)s] %(message)s", level=logging.INFO
|
||||
)
|
||||
|
||||
if __name__ == "__main__":
|
||||
logging.info("Karaokatalog Deduplication started")
|
||||
|
||||
logging.info("Loading library")
|
||||
library = Library.from_dir(Path(sys.argv[1]))
|
||||
logging.info("Library loaded")
|
||||
|
||||
logging.info("Finding duplicates (songs with identical title and artist)")
|
||||
duplicates = find_duplicates(library)
|
||||
logging.info("Duplicates found")
|
||||
|
||||
logging.info("Finding exact duplicates (files are 100% identical)")
|
||||
# TODO Make this abortable and restartable
|
||||
exact_duplicates = tuple(
|
||||
equivalence_class
|
||||
for songs in tqdm(duplicates.values(), unit=" duplicates")
|
||||
for equivalence_class in get_equivalence_classes(songs, Song.has_identic_files)
|
||||
)
|
||||
logging.info("Exact duplicates found")
|
||||
|
||||
logging.info("Determining songs to prune")
|
||||
pruning_instructions = tuple(
|
||||
instruction
|
||||
for equivalence_class in tqdm(exact_duplicates, unit=" exact duplicates")
|
||||
for instruction in prune(equivalence_class)
|
||||
)
|
||||
logging.info(f"{len(pruning_instructions)} exactly duplicated songs will be deleted")
|
||||
|
||||
# TODO Call all pruning_instructions, to actually delete the files
|
||||
|
||||
logging.info("Karaokatalog Deduplication done")
|
Loading…
x
Reference in New Issue
Block a user