mithril_persistence/sqlite/
cleaner.rs

1use std::collections::BTreeSet;
2
3use slog::{Logger, debug};
4
5use mithril_common::StdResult;
6use mithril_common::logging::LoggerExtensions;
7
8use crate::sqlite::SqliteConnection;
9
10/// Tasks that can be performed by the SqliteCleaner
11///
12/// Important: For some tasks order matters, Vacuum must be executed before the wal checkpoint else
13/// the database main file will not shrink as the changes are yet to be written to the main file.
14#[derive(Eq, PartialEq, Copy, Clone, Ord, PartialOrd)]
15pub enum SqliteCleaningTask {
16    /// Reconstruct the database file, repacking it into a minimal amount of disk space.
17    ///
18    /// see: <https://www.sqlite.org/lang_vacuum.html>
19    ///
20    /// ⚠ This operation can be very slow on large databases ⚠
21    Vacuum,
22    /// Run a checkpoint to transfer the data from the WAL file to the main db file and truncate
23    /// it afterward.
24    ///
25    /// see: <https://www.sqlite.org/pragma.html#pragma_wal_checkpoint>
26    WalCheckpointTruncate,
27    /// Attempt to optimize the database, running 'ANALYZE' depending on the mode.
28    ///
29    /// see: <https://sqlite.org/pragma.html#pragma_optimize>
30    Optimize(OptimizeMode),
31}
32
33/// Mode when running database optimization.
34#[derive(Eq, PartialEq, Copy, Clone, Ord, PartialOrd)]
35pub enum OptimizeMode {
36    /// Optimize with the default flags: on recent table only and with an analysis limit to prevent
37    /// excess run-time (recommended by SQLite before closing a short-lived connection)
38    Default,
39    /// Optimize all tables without an analysis limit (recommended by SQLite when opening a
40    /// long-lived database connection)
41    AllTables,
42}
43
44impl SqliteCleaningTask {
45    /// Get the log message for the task.
46    pub fn log_message(self: SqliteCleaningTask) -> &'static str {
47        match self {
48            SqliteCleaningTask::Vacuum => "Running `vacuum` on the SQLite database",
49            SqliteCleaningTask::WalCheckpointTruncate => {
50                "Running `wal_checkpoint(TRUNCATE)` on the SQLite database"
51            }
52            SqliteCleaningTask::Optimize(OptimizeMode::Default) => {
53                "Running `pragma optimize;` on the SQLite database"
54            }
55            SqliteCleaningTask::Optimize(OptimizeMode::AllTables) => {
56                "Running `pragma optimize=0x10002;` on the SQLite database"
57            }
58        }
59    }
60}
61
62/// The SqliteCleaner is responsible for cleaning up databases by performing tasks defined
63/// in [SqliteCleaningTask].
64pub struct SqliteCleaner<'a> {
65    connection: &'a SqliteConnection,
66    logger: Logger,
67    tasks: BTreeSet<SqliteCleaningTask>,
68}
69
70impl<'a> SqliteCleaner<'a> {
71    /// Create a new instance of the `SqliteCleaner`.
72    pub fn new(connection: &'a SqliteConnection) -> Self {
73        Self {
74            connection,
75            logger: Logger::root(slog::Discard, slog::o!()),
76            tasks: BTreeSet::new(),
77        }
78    }
79
80    /// Set the logger to be used by the cleaner.
81    pub fn with_logger(mut self, logger: Logger) -> Self {
82        self.logger = logger.new_with_component_name::<Self>();
83        self
84    }
85
86    /// Set the [SqliteCleaningTask] to be performed by the cleaner.
87    ///
88    /// **Important** Tasks are deduplicated and reordered based on a [BTreeSet] meaning:
89    /// - The order of the variants of the [SqliteCleaningTask] enum is used: [SqliteCleaningTask::Vacuum]
90    ///   will be performed before [SqliteCleaningTask::WalCheckpointTruncate] even if they are given in the reverse order.
91    /// - if the cleaner already had one of the given tasks, the new one will be ignored.
92    /// - if a task is twice in the given tasks, only the first occurrence will be kept.
93    pub fn with_tasks(mut self, tasks: &[SqliteCleaningTask]) -> Self {
94        for task in tasks {
95            self.tasks.insert(*task);
96        }
97        self
98    }
99
100    /// Run the database optimization on the given connection.
101    ///
102    /// Shortcut for
103    /// ```
104    /// # use mithril_persistence::sqlite::{SqliteCleaner, SqliteCleaningTask, OptimizeMode, ConnectionBuilder};
105    /// #
106    /// # fn main() -> mithril_common::StdResult<()> {
107    /// #   let connection = ConnectionBuilder::open_memory().build().unwrap();
108    /// #   let mode = OptimizeMode::Default;
109    /// #
110    /// SqliteCleaner::new(&connection).with_tasks(&[SqliteCleaningTask::Optimize(mode)]).run()?;
111    /// #   Ok(())
112    /// # }
113    /// ```
114    pub fn optimize(connection: &'a SqliteConnection, mode: OptimizeMode) -> StdResult<()> {
115        SqliteCleaner::new(connection)
116            .with_tasks(&[SqliteCleaningTask::Optimize(mode)])
117            .run()
118    }
119
120    /// Cleanup the database by performing the defined tasks.
121    ///
122    /// **Important**: Tasks running order is based on the order of the variants of the [SqliteCleaningTask] enum.
123    pub fn run(self) -> StdResult<()> {
124        for task in &self.tasks {
125            debug!(self.logger, "{}", task.log_message());
126
127            match task {
128                SqliteCleaningTask::Vacuum => {
129                    self.connection.execute("vacuum")?;
130                }
131                SqliteCleaningTask::WalCheckpointTruncate => {
132                    self.connection.execute("PRAGMA wal_checkpoint(TRUNCATE);")?;
133                }
134                SqliteCleaningTask::Optimize(OptimizeMode::Default) => {
135                    self.connection.execute("PRAGMA optimize;")?;
136                }
137                SqliteCleaningTask::Optimize(OptimizeMode::AllTables) => {
138                    self.connection.execute("PRAGMA optimize=0x10002;")?;
139                }
140            }
141        }
142
143        // Run a minimal WAL checkpoint, note: running a checkpoint when the WAL is disabled is harmless.
144        if !self.tasks.contains(&SqliteCleaningTask::WalCheckpointTruncate) {
145            self.connection.execute("PRAGMA wal_checkpoint(PASSIVE);")?;
146        }
147
148        Ok(())
149    }
150}
151
152#[cfg(test)]
153mod tests {
154    use std::ops::Range;
155    use std::path::Path;
156
157    use mithril_common::current_function;
158    use mithril_common::test::TempDir;
159
160    use crate::sqlite::{ConnectionBuilder, ConnectionOptions, SqliteConnection};
161
162    use super::*;
163
164    fn add_test_table(connection: &SqliteConnection) {
165        connection
166            .execute("CREATE TABLE IF NOT EXISTS test (id INTEGER PRIMARY KEY, text TEXT);")
167            .unwrap();
168    }
169
170    fn fill_test_table(connection: &SqliteConnection, ids: Range<u64>) {
171        connection
172            .execute(format!(
173                "INSERT INTO test (id, text) VALUES {}",
174                ids.map(|i| format!("({i}, 'some text to fill the db')"))
175                    .collect::<Vec<String>>()
176                    .join(", ")
177            ))
178            .unwrap();
179    }
180
181    fn delete_test_rows(connection: &SqliteConnection, ids: Range<u64>) {
182        connection
183            .execute(format!(
184                "DELETE FROM test WHERE id >= {} and id < {}",
185                ids.start, ids.end
186            ))
187            .unwrap();
188    }
189
190    /// Apply migrations, disable auto_vacuum and mangle the database to create some free pages
191    /// for the vacuum to reclaim
192    fn prepare_db_for_vacuum(connection: &SqliteConnection) {
193        // Disable Auto vacuum to allow the test to check if the vacuum was run
194        connection.execute("pragma auto_vacuum = none; vacuum;").unwrap();
195        add_test_table(connection);
196        fill_test_table(connection, 0..10_000);
197        // Checkpoint before deletion so entries are transferred from the WAL file to the main db
198        connection.execute("PRAGMA wal_checkpoint(PASSIVE)").unwrap();
199        delete_test_rows(connection, 0..5_000);
200        // Checkpoint after deletion to create free pages in the main db
201        connection.execute("PRAGMA wal_checkpoint(PASSIVE)").unwrap();
202    }
203
204    fn file_size(path: &Path) -> u64 {
205        path.metadata()
206            .unwrap_or_else(|_| panic!("Failed to read len of '{}'", path.display()))
207            .len()
208    }
209
210    fn tables_to_analyze(connection: &SqliteConnection) -> Vec<String> {
211        // This pragma lists the ANALYZE queries that would be performed, one row per table, i.e. `ANALYZE "main"."table"`
212        connection
213            .prepare("PRAGMA optimize(-1);")
214            .unwrap()
215            .iter()
216            .map(|row| row.unwrap().read::<&str, _>(0).to_string())
217            .collect()
218    }
219
220    #[test]
221    fn cleanup_empty_in_memory_db_should_not_crash() {
222        let connection = ConnectionBuilder::open_memory().build().unwrap();
223
224        SqliteCleaner::new(&connection)
225            .with_tasks(&[SqliteCleaningTask::Vacuum])
226            .run()
227            .expect("Vacuum should not fail");
228        SqliteCleaner::new(&connection)
229            .with_tasks(&[SqliteCleaningTask::WalCheckpointTruncate])
230            .run()
231            .expect("WalCheckpointTruncate should not fail");
232    }
233
234    #[test]
235    fn cleanup_empty_file_without_wal_db_should_not_crash() {
236        let db_path = TempDir::create("sqlite_cleaner", current_function!()).join("test.db");
237        let connection = ConnectionBuilder::open_file(&db_path).build().unwrap();
238
239        SqliteCleaner::new(&connection)
240            .with_tasks(&[SqliteCleaningTask::Vacuum])
241            .run()
242            .expect("Vacuum should not fail");
243        SqliteCleaner::new(&connection)
244            .with_tasks(&[SqliteCleaningTask::WalCheckpointTruncate])
245            .run()
246            .expect("WalCheckpointTruncate should not fail");
247    }
248
249    #[test]
250    fn test_vacuum() {
251        let db_dir = TempDir::create("sqlite_cleaner", current_function!());
252        let (db_path, db_wal_path) = (db_dir.join("test.db"), db_dir.join("test.db-wal"));
253        let connection = ConnectionBuilder::open_file(&db_path)
254            .with_options(&[ConnectionOptions::EnableWriteAheadLog])
255            .build()
256            .unwrap();
257        prepare_db_for_vacuum(&connection);
258
259        let db_initial_size = file_size(&db_path);
260        assert!(db_initial_size > 0);
261
262        SqliteCleaner::new(&connection)
263            .with_tasks(&[SqliteCleaningTask::Vacuum])
264            .run()
265            .unwrap();
266
267        let db_after_vacuum_size = file_size(&db_path);
268
269        assert!(
270            db_initial_size > db_after_vacuum_size,
271            "db size should have decreased (vacuum enabled)"
272        );
273        assert!(
274            file_size(&db_wal_path) > 0,
275            "db wal file should not have been truncated (truncate disabled)"
276        );
277    }
278
279    #[test]
280    fn test_truncate_wal() {
281        let db_dir = TempDir::create("sqlite_cleaner", current_function!());
282        let (db_path, db_wal_path) = (db_dir.join("test.db"), db_dir.join("test.db-wal"));
283        let connection = ConnectionBuilder::open_file(&db_path)
284            .with_options(&[ConnectionOptions::EnableWriteAheadLog])
285            .build()
286            .unwrap();
287
288        // Make "neutral" changes to the db, this will fill the WAL files with some data
289        // but won't change the db size after cleaning up.
290        add_test_table(&connection);
291        fill_test_table(&connection, 0..10_000);
292        delete_test_rows(&connection, 0..10_000);
293
294        assert!(file_size(&db_wal_path) > 0);
295
296        SqliteCleaner::new(&connection)
297            .with_tasks(&[SqliteCleaningTask::WalCheckpointTruncate])
298            .run()
299            .unwrap();
300
301        assert_eq!(
302            file_size(&db_wal_path),
303            0,
304            "db wal file should have been truncated"
305        );
306    }
307
308    #[test]
309    fn test_vacuum_truncate_wal() {
310        let db_dir = TempDir::create("sqlite_cleaner", current_function!());
311        let (db_path, db_wal_path) = (db_dir.join("test.db"), db_dir.join("test.db-wal"));
312        let connection = ConnectionBuilder::open_file(&db_path)
313            .with_options(&[ConnectionOptions::EnableWriteAheadLog])
314            .build()
315            .unwrap();
316
317        // Make "neutral" changes to the db, this will fill the WAL files with some data
318        // but won't change the db size after cleaning up.
319        add_test_table(&connection);
320        fill_test_table(&connection, 0..10_000);
321        delete_test_rows(&connection, 0..10_000);
322
323        assert!(file_size(&db_wal_path) > 0);
324
325        SqliteCleaner::new(&connection)
326            .with_tasks(&[SqliteCleaningTask::WalCheckpointTruncate, SqliteCleaningTask::Vacuum])
327            .run()
328            .unwrap();
329
330        assert_eq!(
331            file_size(&db_wal_path),
332            0,
333            "db wal file should have been truncated"
334        );
335    }
336
337    #[test]
338    fn test_optimize_default() {
339        let db_path = TempDir::create("sqlite_cleaner", current_function!()).join("test.db");
340        let connection = ConnectionBuilder::open_file(&db_path).build().unwrap();
341
342        add_test_table(&connection);
343        // Without an index the "test" table is always marked as a candidate for ANALYZE
344        connection.execute("create index test_text on test(text)").unwrap();
345        // One of the potential conditions to trigger ANALYZE is when a table "has increased or decreased by 10-fold"
346        fill_test_table(&connection, 0..10_000);
347
348        let analyze_candidate = tables_to_analyze(&connection);
349        assert!(!analyze_candidate.is_empty());
350
351        SqliteCleaner::optimize(&connection, OptimizeMode::Default).unwrap();
352
353        assert_eq!(Vec::<String>::new(), tables_to_analyze(&connection));
354    }
355
356    #[test]
357    fn test_optimize_all_tables() {
358        let db_path = TempDir::create("sqlite_cleaner", current_function!()).join("test.db");
359        let connection = ConnectionBuilder::open_file(&db_path).build().unwrap();
360
361        add_test_table(&connection);
362        // Without an index the "test" table is always marked as a candidate for ANALYZE
363        connection.execute("create index test_text on test(text)").unwrap();
364        // One of the potential conditions to trigger ANALYZE is when a table "has increased or decreased by 10-fold"
365        fill_test_table(&connection, 0..10_000);
366
367        let analyze_candidate = tables_to_analyze(&connection);
368        assert!(!analyze_candidate.is_empty());
369
370        SqliteCleaner::optimize(&connection, OptimizeMode::AllTables).unwrap();
371
372        assert_eq!(Vec::<String>::new(), tables_to_analyze(&connection));
373    }
374}