Compare commits
10 Commits
6e982950b4
...
8676b00ae3
Author | SHA1 | Date | |
---|---|---|---|
8676b00ae3 | |||
21a0764fe2 | |||
424942429d | |||
da023a5ae2 | |||
ddc1448acc | |||
af7fcbb4c4 | |||
25f56619d9 | |||
4250773a09 | |||
f62f076567 | |||
5232a12738 |
2
.gitignore
vendored
2
.gitignore
vendored
@ -1,2 +1,4 @@
|
||||
__pycache__/
|
||||
venv/
|
||||
.vscode/
|
||||
instance/
|
||||
|
@ -1,5 +1,6 @@
|
||||
from collections.abc import Sequence
|
||||
from dataclasses import dataclass
|
||||
from functools import cached_property
|
||||
from pathlib import Path
|
||||
from typing import Self
|
||||
|
||||
@ -31,3 +32,22 @@ class Library:
|
||||
)
|
||||
|
||||
return cls(songs, unparseable_song_txts)
|
||||
|
||||
@cached_property
|
||||
def songs_in_unique_dirs(self) -> Sequence[Song]:
|
||||
"""
|
||||
Sometimes, more than one song can share the same base directory in a library (this usually happens when there are variants
|
||||
of the song which share the same audio file).
|
||||
|
||||
For deduplication, it is helpful to assume every song is in its own directory. This returns a sequence of songs filtered so
|
||||
that there are no two songs sharing the same base dir. This filtering is done arbitrarily.
|
||||
"""
|
||||
unique_dirs: set[Path] = set()
|
||||
songs_in_unique_dirs: list[Song] = []
|
||||
|
||||
for song in self.songs:
|
||||
if song.dir not in unique_dirs:
|
||||
songs_in_unique_dirs.append(song)
|
||||
unique_dirs.add(song.dir)
|
||||
|
||||
return tuple(songs_in_unique_dirs)
|
||||
|
@ -39,9 +39,8 @@ if __name__ == "__main__":
|
||||
logging.info("Library loaded")
|
||||
|
||||
logging.info("Finding duplicates (songs with identical title and artist)")
|
||||
# TODO Ensure we only have one song per directory
|
||||
duplicates = find_duplicates(library.songs)
|
||||
logging.info("Duplicates found")
|
||||
duplicates = find_duplicates(library.songs_in_unique_dirs)
|
||||
logging.info(f"{len(duplicates)} duplicate sets found")
|
||||
|
||||
logging.info("Finding exact duplicates (files are 100% identical)")
|
||||
# TODO Make this abortable and restartable
|
||||
@ -50,7 +49,7 @@ if __name__ == "__main__":
|
||||
for songs in tqdm(duplicates.values(), unit=" duplicates")
|
||||
for equivalence_class in get_equivalence_classes(songs, Song.has_identic_files)
|
||||
)
|
||||
logging.info("Exact duplicates found")
|
||||
logging.info(f"{len(exact_duplicates)} exact duplicate sets found")
|
||||
|
||||
logging.info("Determining songs to prune")
|
||||
pruning_instructions = tuple(
|
||||
|
@ -1,3 +1,4 @@
|
||||
import shutil
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
|
||||
@ -13,4 +14,7 @@ class DeleteInstruction(Instruction):
|
||||
path_to_delete: Path
|
||||
|
||||
def __call__(self) -> None:
|
||||
self.path_to_delete.unlink()
|
||||
try:
|
||||
self.path_to_delete.unlink()
|
||||
except IsADirectoryError:
|
||||
shutil.rmtree(self.path_to_delete)
|
||||
|
@ -1,12 +1,34 @@
|
||||
import re
|
||||
from collections.abc import Sequence
|
||||
|
||||
from karaokatalog.deduplicate.instructions.DeleteInstruction import DeleteInstruction
|
||||
from karaokatalog.Song import Song
|
||||
|
||||
DISCOURAGED_DIR_PATTERN = re.compile(r"/UltrastarDX/Ultrastar DX/(Unsortiert/)?")
|
||||
|
||||
|
||||
def prune(equivalent_songs: Sequence[Song]) -> Sequence[DeleteInstruction]:
|
||||
"""
|
||||
Prune a sequence of equivalent songs, by returning a sequence of instructions
|
||||
that delete all but one song in this sequence.
|
||||
"""
|
||||
pass
|
||||
preferred_songs = tuple(
|
||||
song
|
||||
for song in equivalent_songs
|
||||
if not DISCOURAGED_DIR_PATTERN.search(str(song.dir))
|
||||
)
|
||||
discouraged_songs = tuple(
|
||||
song
|
||||
for song in equivalent_songs
|
||||
if DISCOURAGED_DIR_PATTERN.search(str(song.dir))
|
||||
)
|
||||
|
||||
# Keep an arbitrarily chosen preferred song, if there is at least one, otherwise, just keep an arbitrarily chosen
|
||||
# discouraged song.
|
||||
song_to_keep = preferred_songs[0] if preferred_songs else discouraged_songs[0]
|
||||
|
||||
return tuple(
|
||||
DeleteInstruction(song.dir)
|
||||
for song in equivalent_songs
|
||||
if song is not song_to_keep
|
||||
)
|
||||
|
Loading…
x
Reference in New Issue
Block a user