Add slow SQL query warning (#27545)

- Databases are one of the most important parts of Forgejo, every
interaction uses the database in one way or another. Therefore, it is
important to maintain the database and recognize when the server is not
doing well with the database. There already is the option to log *every*
SQL query along with its execution time, but monitoring becomes
impractical for larger instances and takes up unnecessary storage in the
logs.
- Add a QoL enhancement that allows instance administrators to specify a
threshold value beyond which query execution time is logged as a warning
in the xorm logger. The default value is a conservative five seconds to
avoid this becoming a source of spam in the logs.
- The use case for this patch is that with an instance the size of
Codeberg, monitoring SQL logs is not very fruitful and most of them are
uninteresting. Recently, in the context of persistent deadlock issues
(https://codeberg.org/forgejo/forgejo/issues/220), I have noticed that
certain queries hold locks on tables like comment and issue for several
seconds. This patch helps to identify which queries these are and when
they happen.
- Added unit test.

(cherry picked from commit 9cf501f1af4cd870221cef6af489618785b71186)

---------

Co-authored-by: Gusted <postmaster@gusted.xyz>
Co-authored-by: Giteabot <teabot@gitea.io>
Co-authored-by: 6543 <6543@obermui.de>
This commit is contained in:
Earl Warren 2024-02-23 01:57:24 +01:00 committed by GitHub
parent 532e422027
commit b748d62b46
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 58 additions and 20 deletions

View File

@ -412,6 +412,10 @@ USER = root
;; ;;
;; Whether execute database models migrations automatically ;; Whether execute database models migrations automatically
;AUTO_MIGRATION = true ;AUTO_MIGRATION = true
;;
;; Threshold value (in seconds) beyond which query execution time is logged as a warning in the xorm logger
;;
;SLOW_QUERY_THRESHOLD = 5s
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

View File

@ -458,6 +458,7 @@ The following configuration set `Content-Type: application/vnd.android.package-a
- `MAX_IDLE_CONNS` **2**: Max idle database connections on connection pool, default is 2 - this will be capped to `MAX_OPEN_CONNS`. - `MAX_IDLE_CONNS` **2**: Max idle database connections on connection pool, default is 2 - this will be capped to `MAX_OPEN_CONNS`.
- `CONN_MAX_LIFETIME` **0 or 3s**: Sets the maximum amount of time a DB connection may be reused - default is 0, meaning there is no limit (except on MySQL where it is 3s - see #6804 & #7071). - `CONN_MAX_LIFETIME` **0 or 3s**: Sets the maximum amount of time a DB connection may be reused - default is 0, meaning there is no limit (except on MySQL where it is 3s - see #6804 & #7071).
- `AUTO_MIGRATION` **true**: Whether execute database models migrations automatically. - `AUTO_MIGRATION` **true**: Whether execute database models migrations automatically.
- `SLOW_QUERY_THRESHOLD` **5s**: Threshold value in seconds beyond which query execution time is logged as a warning in the xorm logger.
[^1]: It may be necessary to specify a hostport even when listening on a unix socket, as the port is part of the socket name. see [#24552](https://github.com/go-gitea/gitea/issues/24552#issuecomment-1681649367) for additional details. [^1]: It may be necessary to specify a hostport even when listening on a unix socket, as the port is part of the socket name. see [#24552](https://github.com/go-gitea/gitea/issues/24552#issuecomment-1681649367) for additional details.

View File

@ -11,10 +11,13 @@ import (
"io" "io"
"reflect" "reflect"
"strings" "strings"
"time"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/setting"
"xorm.io/xorm" "xorm.io/xorm"
"xorm.io/xorm/contexts"
"xorm.io/xorm/names" "xorm.io/xorm/names"
"xorm.io/xorm/schemas" "xorm.io/xorm/schemas"
@ -143,6 +146,13 @@ func InitEngine(ctx context.Context) error {
xormEngine.SetConnMaxLifetime(setting.Database.ConnMaxLifetime) xormEngine.SetConnMaxLifetime(setting.Database.ConnMaxLifetime)
xormEngine.SetDefaultContext(ctx) xormEngine.SetDefaultContext(ctx)
if setting.Database.SlowQueryThreshold > 0 {
xormEngine.AddHook(&SlowQueryHook{
Threshold: setting.Database.SlowQueryThreshold,
Logger: log.GetLogger("xorm"),
})
}
SetDefaultEngine(ctx, xormEngine) SetDefaultEngine(ctx, xormEngine)
return nil return nil
} }
@ -298,3 +308,24 @@ func SetLogSQL(ctx context.Context, on bool) {
sess.Engine().ShowSQL(on) sess.Engine().ShowSQL(on)
} }
} }
type SlowQueryHook struct {
Threshold time.Duration
Logger log.Logger
}
var _ contexts.Hook = &SlowQueryHook{}
func (SlowQueryHook) BeforeProcess(c *contexts.ContextHook) (context.Context, error) {
return c.Ctx, nil
}
func (h *SlowQueryHook) AfterProcess(c *contexts.ContextHook) error {
if c.ExecuteTime >= h.Threshold {
// 8 is the amount of skips passed to runtime.Caller, so that in the log the correct function
// is being displayed (the function that ultimately wants to execute the query in the code)
// instead of the function of the slow query hook being called.
h.Logger.Log(8, log.WARN, "[Slow SQL Query] %s %v - %v", c.SQL, c.Args, c.ExecuteTime)
}
return nil
}

View File

@ -25,26 +25,27 @@ var (
// Database holds the database settings // Database holds the database settings
Database = struct { Database = struct {
Type DatabaseType Type DatabaseType
Host string Host string
Name string Name string
User string User string
Passwd string Passwd string
Schema string Schema string
SSLMode string SSLMode string
Path string Path string
LogSQL bool LogSQL bool
MysqlCharset string MysqlCharset string
CharsetCollation string CharsetCollation string
Timeout int // seconds Timeout int // seconds
SQLiteJournalMode string SQLiteJournalMode string
DBConnectRetries int DBConnectRetries int
DBConnectBackoff time.Duration DBConnectBackoff time.Duration
MaxIdleConns int MaxIdleConns int
MaxOpenConns int MaxOpenConns int
ConnMaxLifetime time.Duration ConnMaxLifetime time.Duration
IterateBufferSize int IterateBufferSize int
AutoMigration bool AutoMigration bool
SlowQueryThreshold time.Duration
}{ }{
Timeout: 500, Timeout: 500,
IterateBufferSize: 50, IterateBufferSize: 50,
@ -87,6 +88,7 @@ func loadDBSetting(rootCfg ConfigProvider) {
Database.DBConnectRetries = sec.Key("DB_RETRIES").MustInt(10) Database.DBConnectRetries = sec.Key("DB_RETRIES").MustInt(10)
Database.DBConnectBackoff = sec.Key("DB_RETRY_BACKOFF").MustDuration(3 * time.Second) Database.DBConnectBackoff = sec.Key("DB_RETRY_BACKOFF").MustDuration(3 * time.Second)
Database.AutoMigration = sec.Key("AUTO_MIGRATION").MustBool(true) Database.AutoMigration = sec.Key("AUTO_MIGRATION").MustBool(true)
Database.SlowQueryThreshold = sec.Key("SLOW_QUERY_THRESHOLD").MustDuration(5 * time.Second)
} }
// DBConnStr returns database connection string // DBConnStr returns database connection string