Switch to FTS5 with unicode61 (#229)

* Switch to FTS5 with unicode61

* Update required sqlite version in README

* Update README

* Change back db file
This commit is contained in:
Jonas Kvinge
2019-07-30 22:45:22 +02:00
committed by GitHub
parent 02cda47c28
commit 8a0e66bf11
10 changed files with 104 additions and 263 deletions

View File

@@ -41,6 +41,7 @@
#include <QSqlDriver>
#include <QSqlDatabase>
#include <QSqlQuery>
#include <QSqlResult>
#include <QSqlError>
#include <QStandardPaths>
#include <QtDebug>
@@ -52,177 +53,12 @@
#include "scopedtransaction.h"
const char *Database::kDatabaseFilename = "strawberry.db";
const int Database::kSchemaVersion = 8;
const int Database::kSchemaVersion = 9;
const char *Database::kMagicAllSongsTables = "%allsongstables";
int Database::sNextConnectionId = 1;
QMutex Database::sNextConnectionIdMutex;
Database::Token::Token(const QString &token, int start, int end)
: token(token), start_offset(start), end_offset(end) {}
struct sqlite3_tokenizer_module {
int iVersion;
int (*xCreate)(int argc, /* Size of argv array */
const char *const *argv, /* Tokenizer argument strings */
sqlite3_tokenizer** ppTokenizer); /* OUT: Created tokenizer */
int (*xDestroy)(sqlite3_tokenizer *pTokenizer);
int (*xOpen)(
sqlite3_tokenizer *pTokenizer, /* Tokenizer object */
const char *pInput, int nBytes, /* Input buffer */
sqlite3_tokenizer_cursor **ppCursor /* OUT: Created tokenizer cursor */
);
int (*xClose)(sqlite3_tokenizer_cursor *pCursor);
int (*xNext)(
sqlite3_tokenizer_cursor *pCursor, /* Tokenizer cursor */
const char* *ppToken, int *pnBytes, /* OUT: Normalized text for token */
int *piStartOffset, /* OUT: Byte offset of token in input buffer */
int *piEndOffset, /* OUT: Byte offset of end of token in input buffer */
int *piPosition); /* OUT: Number of tokens returned before this one */
};
struct sqlite3_tokenizer {
const sqlite3_tokenizer_module *pModule; /* The module for this tokenizer */
/* Tokenizer implementations will typically add additional fields */
};
struct sqlite3_tokenizer_cursor {
sqlite3_tokenizer *pTokenizer; /* Tokenizer for this cursor. */
/* Tokenizer implementations will typically add additional fields */
};
sqlite3_tokenizer_module *Database::sFTSTokenizer = nullptr;
int Database::FTSCreate(int argc, const char *const *argv, sqlite3_tokenizer **tokenizer) {
*tokenizer = reinterpret_cast<sqlite3_tokenizer*>(new UnicodeTokenizer);
return SQLITE_OK;
}
int Database::FTSDestroy(sqlite3_tokenizer *tokenizer) {
UnicodeTokenizer *real_tokenizer = reinterpret_cast<UnicodeTokenizer*>(tokenizer);
delete real_tokenizer;
return SQLITE_OK;
}
int Database::FTSOpen(sqlite3_tokenizer *pTokenizer, const char *input, int bytes, sqlite3_tokenizer_cursor **cursor) {
UnicodeTokenizerCursor *new_cursor = new UnicodeTokenizerCursor;
new_cursor->pTokenizer = pTokenizer;
new_cursor->position = 0;
QString str = QString::fromUtf8(input, bytes).toLower();
QChar *data = str.data();
// Decompose and strip punctuation.
QList<Token> tokens;
QString token;
int start_offset = 0;
int offset = 0;
for (int i = 0; i < str.length(); ++i) {
QChar c = data[i];
ushort unicode = c.unicode();
if (unicode <= 0x007f) {
offset += 1;
}
else if (unicode >= 0x0080 && unicode <= 0x07ff) {
offset += 2;
}
else if (unicode >= 0x0800) {
offset += 3;
}
// Unicode astral planes unsupported in Qt?
/*else if (unicode >= 0x010000 && unicode <= 0x10ffff) {
offset += 4;
}*/
if (!data[i].isLetterOrNumber()) {
// Token finished.
if (token.length() != 0) {
tokens << Token(token, start_offset, offset - 1);
start_offset = offset;
token.clear();
}
else {
++start_offset;
}
}
else {
if (data[i].decompositionTag() != QChar::NoDecomposition) {
token.push_back(data[i].decomposition()[0]);
} else {
token.push_back(data[i]);
}
}
if (i == str.length() - 1) {
if (token.length() != 0) {
tokens << Token(token, start_offset, offset);
token.clear();
}
}
}
new_cursor->tokens = tokens;
*cursor = reinterpret_cast<sqlite3_tokenizer_cursor*>(new_cursor);
return SQLITE_OK;
}
int Database::FTSClose(sqlite3_tokenizer_cursor *cursor) {
UnicodeTokenizerCursor *real_cursor = reinterpret_cast<UnicodeTokenizerCursor*>(cursor);
delete real_cursor;
return SQLITE_OK;
}
int Database::FTSNext(sqlite3_tokenizer_cursor *cursor, const char* *token, int *bytes, int *start_offset, int *end_offset, int *position) {
UnicodeTokenizerCursor *real_cursor = reinterpret_cast<UnicodeTokenizerCursor*>(cursor);
QList<Token> tokens = real_cursor->tokens;
if (real_cursor->position >= tokens.size()) {
return SQLITE_DONE;
}
Token t = tokens[real_cursor->position];
QByteArray utf8 = t.token.toUtf8();
*token = utf8.constData();
*bytes = utf8.size();
*start_offset = t.start_offset;
*end_offset = t.end_offset;
*position = real_cursor->position++;
real_cursor->current_utf8 = utf8;
return SQLITE_OK;
}
void Database::StaticInit() {
if (sFTSTokenizer) return;
sFTSTokenizer = new sqlite3_tokenizer_module;
sFTSTokenizer->iVersion = 0;
sFTSTokenizer->xCreate = &Database::FTSCreate;
sFTSTokenizer->xDestroy = &Database::FTSDestroy;
sFTSTokenizer->xOpen = &Database::FTSOpen;
sFTSTokenizer->xNext = &Database::FTSNext;
sFTSTokenizer->xClose = &Database::FTSClose;
return;
}
Database::Database(Application *app, QObject *parent, const QString &database_name) :
QObject(parent),
app_(app),
@@ -254,9 +90,6 @@ Database::~Database() {
qLog(Error) << "Connection" << connection_id << "is still open!";
}
if (sFTSTokenizer)
delete sFTSTokenizer;
}
void Database::ExitAsync() {
@@ -308,14 +141,14 @@ QSqlDatabase Database::Connect() {
return db;
}
// Find Sqlite3 functions in the Qt plugin.
if (!sFTSTokenizer) StaticInit();
{
if (db.tables().count() == 0) {
// Set up initial schema
qLog(Info) << "Creating initial database schema";
UpdateDatabaseSchema(0, db);
}
else if (SchemaVersion(&db) <= 8) { // Register unicode from unicode61 tokenizer to drop old FTS3 tables.
#ifdef SQLITE_DBCONFIG_ENABLE_FTS3_TOKENIZER
// In case sqlite>=3.12 is compiled without -DSQLITE_ENABLE_FTS3_TOKENIZER
// (generally a good idea due to security reasons) the fts3 support should be enabled explicitly.
QVariant v = db.driver()->handle();
if (v.isValid() && qstrcmp(v.typeName(), "sqlite3*") == 0) {
sqlite3 *handle = *static_cast<sqlite3**>(v.data());
@@ -326,20 +159,21 @@ QSqlDatabase Database::Connect() {
else qLog(Fatal) << "Unable to enable FTS3 tokenizer";
}
#endif
QSqlQuery set_fts_tokenizer(db);
set_fts_tokenizer.prepare("SELECT fts3_tokenizer(:name, :pointer)");
set_fts_tokenizer.bindValue(":name", "unicode");
set_fts_tokenizer.bindValue(":pointer", QByteArray(reinterpret_cast<const char*>(&sFTSTokenizer), sizeof(&sFTSTokenizer)));
if (!set_fts_tokenizer.exec()) {
qLog(Warning) << "Couldn't register FTS3 tokenizer : " << set_fts_tokenizer.lastError();
QSqlQuery get_fts_tokenizer(db);
get_fts_tokenizer.prepare("SELECT fts3_tokenizer(:name)");
get_fts_tokenizer.bindValue(":name", "unicode61");
if (get_fts_tokenizer.exec() && get_fts_tokenizer.next()) {
QSqlQuery set_fts_tokenizer(db);
set_fts_tokenizer.prepare("SELECT fts3_tokenizer(:name, :pointer)");
set_fts_tokenizer.bindValue(":name", "unicode");
set_fts_tokenizer.bindValue(":pointer", get_fts_tokenizer.value(0));
if (!set_fts_tokenizer.exec()) {
qLog(Warning) << "Couldn't register FTS3 tokenizer : " << set_fts_tokenizer.lastError();
}
}
else {
qLog(Warning) << "Couldn't get FTS3 tokenizer : " << get_fts_tokenizer.lastError();
}
// Implicit invocation of ~QSqlQuery() when leaving the scope to release any remaining database locks!
}
if (db.tables().count() == 0) {
// Set up initial schema
qLog(Info) << "Creating initial database schema";
UpdateDatabaseSchema(0, db);
}
// Attach external databases
@@ -399,7 +233,7 @@ void Database::Close() {
}
void Database::UpdateMainSchema(QSqlDatabase *db) {
int Database::SchemaVersion(QSqlDatabase *db) {
// Get the database's schema version
int schema_version = 0;
@@ -408,7 +242,13 @@ void Database::UpdateMainSchema(QSqlDatabase *db) {
if (q.next()) schema_version = q.value(0).toInt();
// Implicit invocation of ~QSqlQuery() when leaving the scope to release any remaining database locks!
}
return schema_version;
}
void Database::UpdateMainSchema(QSqlDatabase *db) {
int schema_version = SchemaVersion(db);
startup_schema_version_ = schema_version;
if (schema_version > kSchemaVersion) {
@@ -591,6 +431,7 @@ void Database::ExecSongTablesCommands(QSqlDatabase &db, const QStringList &song_
if (CheckErrors(query)) qFatal("Unable to update music collection database");
}
}
}
QStringList Database::SongsTables(QSqlDatabase &db, int schema_version) const {
@@ -745,4 +586,3 @@ void Database::BackupFile(const QString &filename) {
sqlite3_backup_finish(backup);
}