diff --git a/karaokatalog/deduplicate/prune.py b/karaokatalog/deduplicate/prune.py index 4103443..04d5646 100644 --- a/karaokatalog/deduplicate/prune.py +++ b/karaokatalog/deduplicate/prune.py @@ -1,12 +1,34 @@ +import re from collections.abc import Sequence from karaokatalog.deduplicate.instructions.DeleteInstruction import DeleteInstruction from karaokatalog.Song import Song +DISCOURAGED_DIR_PATTERN = re.compile(r"/UltrastarDX/Ultrastar DX/(Unsortiert/)?") + def prune(equivalent_songs: Sequence[Song]) -> Sequence[DeleteInstruction]: """ Prune a sequence of equivalent songs, by returning a sequence of instructions that delete all but one song in this sequence. """ - pass + preferred_songs = tuple( + song + for song in equivalent_songs + if not DISCOURAGED_DIR_PATTERN.search(str(song.dir)) + ) + discouraged_songs = tuple( + song + for song in equivalent_songs + if DISCOURAGED_DIR_PATTERN.search(str(song.dir)) + ) + + # Keep an arbitrarily chosen preferred song, if there is at least one, otherwise, just keep an arbitrarily chosen + # discouraged song. + song_to_keep = preferred_songs[0] if preferred_songs else discouraged_songs[0] + + return tuple( + DeleteInstruction(song.dir) + for song in equivalent_songs + if song is not song_to_keep + )