GeniusLyrics: update to parse latest HTML of returned lyrics,
devolving the removal of various crud to `HtmlLyricsProvider`; log initial query and use new `StartsOrEndsMatch()` static to match JSON replies, log each request, and break if full match; `StartsOrEndsMatch()` ignores some common punctuation variations & normalizes single quotes and allows match at beginning or end HtmlLyricsProvider: fix `multiple` mode not to terminate on first batch, and defer processing till have whole HTML (avoids issues with tags spanning batches); add param to take list of regular expressions to remove from HTML prior to general processing (used only by `GeniusLyrics` for now) README.md etc: update list of lyrics providers supported
This commit is contained in:
@@ -53,7 +53,7 @@ Funding developers is a way to contribute to open source projects you appreciate
|
||||
* Edit tags on audio files
|
||||
* Fetch tags from MusicBrainz
|
||||
* Album cover art from [Last.fm](https://www.last.fm/), [Musicbrainz](https://musicbrainz.org/), [Discogs](https://www.discogs.com/), [Musixmatch](https://www.musixmatch.com/), [Deezer](https://www.deezer.com/), [Tidal](https://www.tidal.com/), [Qobuz](https://www.qobuz.com/) and [Spotify](https://www.spotify.com/)
|
||||
* Song lyrics from [Genius](https://genius.com/), [Musixmatch](https://www.musixmatch.com/), [ChartLyrics](http://www.chartlyrics.com/), [lyrics.ovh](https://lyrics.ovh/), [lololyrics.com](https://www.lololyrics.com/), [songlyrics.com](https://www.songlyrics.com/), [azlyrics.com](https://www.azlyrics.com/) and [elyrics.net](https://www.elyrics.net/)
|
||||
* Song lyrics from [Genius](https://genius.com/), [Musixmatch](https://www.musixmatch.com/), [ChartLyrics](http://www.chartlyrics.com/), [lyrics.ovh](https://lyrics.ovh/), [lololyrics.com](https://www.lololyrics.com/), [songlyrics.com](https://www.songlyrics.com/), [azlyrics.com](https://www.azlyrics.com/), [elyrics.net](https://www.elyrics.net/), [letras.mus.br](https://www.letras.mus.br) and [LyricFind](https://lyrics.lyricfind.com]
|
||||
* Support for multiple backends
|
||||
* Audio analyzer
|
||||
* Audio equalizer
|
||||
|
||||
2
debian/control
vendored
2
debian/control
vendored
@@ -60,7 +60,7 @@ Description: music player and music collection organizer
|
||||
- Edit tags on audio files
|
||||
- Automatically retrieve tags from MusicBrainz
|
||||
- Album cover art from Last.fm, Musicbrainz, Discogs, Musixmatch, Deezer, Tidal, Qobuz and Spotify
|
||||
- Song lyrics from Genius, Musixmatch, ChartLyrics, lyrics.ovh, lololyrics.com, songlyrics.com, azlyrics.com and elyrics.net
|
||||
- Song lyrics from Genius, Musixmatch, ChartLyrics, lyrics.ovh, lololyrics.com, songlyrics.com, azlyrics.com, elyrics.net, letras.mus.br and LyricFind
|
||||
- Audio analyzer
|
||||
- Audio equalizer
|
||||
- Transfer music to mass-storage USB players, MTP compatible devices and iPod Nano/Classic
|
||||
|
||||
@@ -31,7 +31,7 @@
|
||||
<li>Edit tags on audio files</li>
|
||||
<li>Automatically retrieve tags from MusicBrainz</li>
|
||||
<li>Album cover art from Last.fm, Musicbrainz, Discogs, Musixmatch, Deezer, Tidal, Qobuz and Spotify</li>
|
||||
<li>Song lyrics from Genius, Musixmatch, ChartLyrics, lyrics.ovh, lololyrics.com, songlyrics.com, azlyrics.com and elyrics.net</li>
|
||||
<li>Song lyrics from Genius, Musixmatch, ChartLyrics, lyrics.ovh, lololyrics.com, songlyrics.com, azlyrics.com, elyrics.net, letras.mus.br and LyricFind</li>
|
||||
<li>Audio analyzer and equalizer</li>
|
||||
<li>Transfer music to mass-storage USB players, MTP compatible devices and iPod Nano/Classic</li>
|
||||
<li>Scrobbler with support for Last.fm, Libre.fm and ListenBrainz</li>
|
||||
|
||||
2
dist/unix/strawberry.1
vendored
2
dist/unix/strawberry.1
vendored
@@ -29,7 +29,7 @@ Features:
|
||||
.br
|
||||
- Album cover art from Last.fm, Musicbrainz, Discogs, Musixmatch, Deezer, Tidal, Qobuz and Spotify
|
||||
.br
|
||||
- Song lyrics from Lyrics.com, Genius, Musixmatch, ChartLyrics, lyrics.ovh and lololyrics.com
|
||||
- Song lyrics from Genius, Musixmatch, ChartLyrics, lyrics.ovh, lololyrics.com, songlyrics.com, azlyrics.com, elyrics.net, letras.mus.br and LyricFind
|
||||
.br
|
||||
- Support for multiple backends
|
||||
.br
|
||||
|
||||
2
dist/unix/strawberry.spec.in
vendored
2
dist/unix/strawberry.spec.in
vendored
@@ -93,7 +93,7 @@ Features:
|
||||
- Edit tags on audio files
|
||||
- Automatically retrieve tags from MusicBrainz
|
||||
- Album cover art from Last.fm, Musicbrainz, Discogs, Musixmatch, Deezer, Tidal, Qobuz and Spotify
|
||||
- Song lyrics from Genius, Musixmatch, ChartLyrics, lyrics.ovh, lololyrics.com, songlyrics.com, azlyrics.com and elyrics.net
|
||||
- Song lyrics from Genius, Musixmatch, ChartLyrics, lyrics.ovh, lololyrics.com, songlyrics.com, azlyrics.com, elyrics.net, letras.mus.br and LyricFind
|
||||
- Support for multiple backends
|
||||
- Audio analyzer
|
||||
- Audio equalizer
|
||||
|
||||
@@ -148,6 +148,8 @@ void GeniusLyricsProvider::StartSearch(const int id, const LyricsSearchRequest &
|
||||
QNetworkReply *reply = CreateGetRequest(QUrl(QLatin1String(kUrlSearch)), url_query);
|
||||
QObject::connect(reply, &QNetworkReply::finished, this, [this, reply, id]() { HandleSearchReply(reply, id); });
|
||||
|
||||
qLog(Debug) << name_ << "Sending request for" << url_query.query();
|
||||
|
||||
}
|
||||
|
||||
GeniusLyricsProvider::JsonObjectResult GeniusLyricsProvider::ParseJsonObject(QNetworkReply *reply) {
|
||||
@@ -302,10 +304,8 @@ void GeniusLyricsProvider::HandleSearchReply(QNetworkReply *reply, const int id)
|
||||
const QString artist = primary_artist["name"_L1].toString();
|
||||
const QString title = object_result["title"_L1].toString();
|
||||
|
||||
// Ignore results where both the artist and title don't match.
|
||||
if (!artist.startsWith(search->request.albumartist, Qt::CaseInsensitive) &&
|
||||
!artist.startsWith(search->request.artist, Qt::CaseInsensitive) &&
|
||||
!title.startsWith(search->request.title, Qt::CaseInsensitive)) {
|
||||
// Ignore results where the artist or title don't begin or end the same
|
||||
if (!StartsOrEndsMatch(artist, search->request.artist) || !StartsOrEndsMatch(title, search->request.title)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -323,6 +323,12 @@ void GeniusLyricsProvider::HandleSearchReply(QNetworkReply *reply, const int id)
|
||||
QNetworkReply *new_reply = CreateGetRequest(url);
|
||||
QObject::connect(new_reply, &QNetworkReply::finished, this, [this, new_reply, search, url]() { HandleLyricReply(new_reply, search->id, url); });
|
||||
|
||||
qLog(Debug) << name_ << "Sending request for" << url;
|
||||
|
||||
// If full match, don't bother iterating further
|
||||
if (artist == search->request.albumartist && artist == search->request.artist && title == search->request.title) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
@@ -363,12 +369,18 @@ void GeniusLyricsProvider::HandleLyricReply(QNetworkReply *reply, const int sear
|
||||
return;
|
||||
}
|
||||
|
||||
const QString content = QString::fromUtf8(data);
|
||||
QString lyrics = HtmlLyricsProvider::ParseLyricsFromHTML(content, QRegularExpression(u"<div[^>]*>"_s), QRegularExpression(u"<\\/div>"_s), QRegularExpression(u"<div data-lyrics-container=[^>]+>"_s), true);
|
||||
if (lyrics.isEmpty()) {
|
||||
lyrics = HtmlLyricsProvider::ParseLyricsFromHTML(content, QRegularExpression(u"<div[^>]*>"_s), QRegularExpression(u"<\\/div>"_s), QRegularExpression(u"<div class=\"lyrics\">"_s), true);
|
||||
}
|
||||
static const QRegularExpression start_tag(u"<div[^>]*>"_s);
|
||||
static const QRegularExpression end_tag(u"<\\/div>"_s);
|
||||
static const QRegularExpression lyrics_start(u"<div data-lyrics-container=[^>]+>"_s);
|
||||
|
||||
static const QRegularExpression regex_html_tag_span_trans(u"<span class=\"LyricsHeader__Translations[^>]*>[^<]*</span>"_s);
|
||||
static const QRegularExpression regex_html_tag_div_ellipsis(u"<div class=\"LyricsHeader__TextEllipsis[^>]*>[^<]*</div>"_s);
|
||||
static const QRegularExpression regex_html_tag_span_contribs(u"<span class=\"ContributorsCreditSong__Contributors[^>]*>[^<]*</span>"_s);
|
||||
static const QRegularExpression regex_html_tag_div_bio(u"<div class=\"SongBioPreview__Container[^>]*>.*?</div>"_s);
|
||||
static const QRegularExpression regex_html_tag_h2(u"<h2 [^>]*>[^<]*</h2>"_s);
|
||||
static const QList<QRegularExpression> regex_removes{ regex_html_tag_span_trans, regex_html_tag_div_ellipsis, regex_html_tag_span_contribs, regex_html_tag_div_bio, regex_html_tag_h2 };
|
||||
|
||||
const QString lyrics = HtmlLyricsProvider::ParseLyricsFromHTML(QString::fromUtf8(data), start_tag, end_tag, lyrics_start, true, regex_removes);
|
||||
if (!lyrics.isEmpty()) {
|
||||
LyricsSearchResult result(lyrics);
|
||||
result.artist = lyric.artist;
|
||||
@@ -404,3 +416,17 @@ void GeniusLyricsProvider::EndSearch(const int id, const LyricsSearchRequest &re
|
||||
Q_EMIT SearchFinished(id, results);
|
||||
|
||||
}
|
||||
|
||||
bool GeniusLyricsProvider::StartsOrEndsMatch(QString s, QString t) {
|
||||
|
||||
constexpr Qt::CaseSensitivity cs = Qt::CaseInsensitive;
|
||||
|
||||
static const QRegularExpression puncts_regex(u"[!,.:;]"_s);
|
||||
static const QRegularExpression quotes_regex(u"[’‘´`]"_s);
|
||||
|
||||
s.remove(puncts_regex).replace(quotes_regex, u"'"_s);
|
||||
t.remove(puncts_regex).replace(quotes_regex, u"'"_s);
|
||||
|
||||
return (s.compare(t, cs) == 0 && !s.isEmpty()) || (!s.isEmpty() && !t.isEmpty() && (s.startsWith(t, cs) || t.startsWith(s, cs) || s.endsWith(t, cs) || t.endsWith(s, cs)));
|
||||
|
||||
}
|
||||
|
||||
@@ -79,6 +79,9 @@ class GeniusLyricsProvider : public JsonLyricsProvider {
|
||||
void HandleSearchReply(QNetworkReply *reply, const int id);
|
||||
void HandleLyricReply(QNetworkReply *reply, const int search_id, const QUrl &url);
|
||||
|
||||
private:
|
||||
static bool StartsOrEndsMatch(QString s, QString t);
|
||||
|
||||
private:
|
||||
OAuthenticator *oauth_;
|
||||
mutable QMutex mutex_access_token_;
|
||||
|
||||
@@ -109,7 +109,7 @@ void HtmlLyricsProvider::HandleLyricsReply(QNetworkReply *reply, const int id, c
|
||||
|
||||
}
|
||||
|
||||
QString HtmlLyricsProvider::ParseLyricsFromHTML(const QString &content, const QRegularExpression &start_tag, const QRegularExpression &end_tag, const QRegularExpression &lyrics_start, const bool multiple) {
|
||||
QString HtmlLyricsProvider::ParseLyricsFromHTML(const QString &content, const QRegularExpression &start_tag, const QRegularExpression &end_tag, const QRegularExpression &lyrics_start, const bool multiple, const QList<QRegularExpression> ®ex_removes) {
|
||||
|
||||
Q_ASSERT(QThread::currentThread() != qApp->thread());
|
||||
|
||||
@@ -153,29 +153,30 @@ QString HtmlLyricsProvider::ParseLyricsFromHTML(const QString &content, const QR
|
||||
if (!lyrics.isEmpty()) {
|
||||
lyrics.append(u'\n');
|
||||
}
|
||||
lyrics.append(content.mid(start_lyrics_idx, end_lyrics_idx - start_lyrics_idx).remove(u'\r').remove(u'\n'));
|
||||
}
|
||||
|
||||
}
|
||||
while (start_idx > 0 && multiple);
|
||||
|
||||
for (auto it = regex_removes.cbegin(); it != regex_removes.cend(); it++) {
|
||||
lyrics.remove(*it);
|
||||
}
|
||||
static const QRegularExpression regex_html_tag_a(u"<a [^>]*>[^<]*</a>"_s);
|
||||
static const QRegularExpression regex_html_tag_script(u"<script>[^>]*</script>"_s);
|
||||
static const QRegularExpression regex_html_tag_div(u"<div [^>]*>×</div>"_s);
|
||||
static const QRegularExpression regex_html_tag_br(u"<br[^>]*>"_s);
|
||||
static const QRegularExpression regex_html_tag_p_close(u"</p>"_s);
|
||||
static const QRegularExpression regex_html_tags(u"<[^>]*>"_s);
|
||||
lyrics.append(content.mid(start_lyrics_idx, end_lyrics_idx - start_lyrics_idx)
|
||||
.remove(u'\r')
|
||||
.remove(u'\n')
|
||||
.remove(regex_html_tag_a)
|
||||
static const QRegularExpression regex_newlines_squash(u"\\n{3,}"_s);
|
||||
lyrics.remove(regex_html_tag_a)
|
||||
.remove(regex_html_tag_script)
|
||||
.remove(regex_html_tag_div)
|
||||
.replace(regex_html_tag_br, u"\n"_s)
|
||||
.replace(regex_html_tag_p_close, u"\n\n"_s)
|
||||
.remove(regex_html_tags)
|
||||
.trimmed());
|
||||
}
|
||||
else {
|
||||
start_idx = -1;
|
||||
}
|
||||
|
||||
}
|
||||
while (start_idx > 0 && multiple);
|
||||
.replace(regex_newlines_squash, u"\n\n"_s);
|
||||
lyrics = lyrics.trimmed();
|
||||
|
||||
if (lyrics.length() > 6000 || lyrics.contains("there are no lyrics to"_L1, Qt::CaseInsensitive)) {
|
||||
return QString();
|
||||
|
||||
@@ -41,7 +41,7 @@ class HtmlLyricsProvider : public LyricsProvider {
|
||||
|
||||
virtual bool StartSearchAsync(const int id, const LyricsSearchRequest &request) override;
|
||||
|
||||
static QString ParseLyricsFromHTML(const QString &content, const QRegularExpression &start_tag, const QRegularExpression &end_tag, const QRegularExpression &lyrics_start, const bool multiple);
|
||||
static QString ParseLyricsFromHTML(const QString &content, const QRegularExpression &start_tag, const QRegularExpression &end_tag, const QRegularExpression &lyrics_start, const bool multiple, const QList<QRegularExpression> ®ex_removes = {});
|
||||
|
||||
protected:
|
||||
virtual QUrl Url(const LyricsSearchRequest &request) = 0;
|
||||
|
||||
Reference in New Issue
Block a user