Add basic method to find duplicates
This commit is contained in:
parent
b30e579586
commit
8988bb820c
34
karaokatalog/find_duplicates.py
Normal file
34
karaokatalog/find_duplicates.py
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
from collections.abc import Mapping, Sequence
|
||||||
|
from collections import defaultdict
|
||||||
|
|
||||||
|
from tqdm import tqdm
|
||||||
|
|
||||||
|
from karaokatalog.Library import Library
|
||||||
|
from karaokatalog.Song import Song, Title, Artist
|
||||||
|
|
||||||
|
type TitleAndArtist = tuple[Title, Artist]
|
||||||
|
|
||||||
|
|
||||||
|
def find_duplicates(l: Library) -> Mapping[TitleAndArtist, Sequence[Song]]:
|
||||||
|
"""
|
||||||
|
Find all song duplicates, i.e., all songs with the same title and same artist.
|
||||||
|
|
||||||
|
Note that those might not be identical for several reasons:
|
||||||
|
|
||||||
|
- Maybe the associated track is different (e.g. one is a music video, the other just the song)
|
||||||
|
- Maybe the lyrics or notes are different (e.g. one is created with more attention to detail)
|
||||||
|
- Maybe an artist just happens to share a name with another artist, who both published a song with the same title
|
||||||
|
- ...
|
||||||
|
"""
|
||||||
|
songs_by_title_and_artist: defaultdict[TitleAndArtist, list[Song]] = defaultdict(
|
||||||
|
list
|
||||||
|
)
|
||||||
|
|
||||||
|
for song in tqdm(l.songs, unit=" songs"):
|
||||||
|
songs_by_title_and_artist[(song.title, song.artist)].append(song)
|
||||||
|
|
||||||
|
return {
|
||||||
|
normalized_title_and_artist: songs
|
||||||
|
for normalized_title_and_artist, songs in songs_by_title_and_artist.items()
|
||||||
|
if len(songs) > 1
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user