mithril_aggregator/services/
upkeep.rs

1//! ## Upkeep Service
2//!
3//! This service is responsible for the upkeep of the application.
4//!
5//! It is in charge of the following tasks:
6//! * free up space by executing vacuum and WAL checkpoint on the database
7
8use std::sync::Arc;
9
10use anyhow::Context;
11use async_trait::async_trait;
12use mithril_common::StdResult;
13use mithril_common::entities::Epoch;
14use mithril_common::logging::LoggerExtensions;
15use mithril_persistence::sqlite::{
16    SqliteCleaner, SqliteCleaningTask, SqliteConnection, SqliteConnectionPool,
17};
18use mithril_signed_entity_lock::SignedEntityTypeLock;
19use slog::{Logger, info};
20
21/// Define the service responsible for the upkeep of the application.
22#[cfg_attr(test, mockall::automock)]
23#[async_trait]
24pub trait UpkeepService: Send + Sync {
25    /// Run the upkeep service.
26    async fn run(&self, epoch: Epoch) -> StdResult<()>;
27
28    /// Vacuum database.
29    async fn vacuum(&self) -> StdResult<()>;
30}
31
32/// Define the task responsible for pruning a datasource below a certain epoch threshold.
33#[cfg_attr(test, mockall::automock)]
34#[async_trait]
35pub trait EpochPruningTask: Send + Sync {
36    /// Get the name of the data that will be pruned.
37    fn pruned_data(&self) -> &'static str;
38
39    /// Prune the datasource based on the given current epoch.
40    async fn prune(&self, current_epoch: Epoch) -> StdResult<()>;
41}
42
43/// Implementation of the upkeep service for the aggregator.
44///
45/// To ensure that connections are cleaned up properly, it creates new connections itself
46/// instead of relying on a connection pool or a shared connection.
47pub struct AggregatorUpkeepService {
48    main_db_connection: Arc<SqliteConnection>,
49    cardano_tx_connection_pool: Arc<SqliteConnectionPool>,
50    event_store_connection: Arc<SqliteConnection>,
51    signed_entity_type_lock: Arc<SignedEntityTypeLock>,
52    pruning_tasks: Vec<Arc<dyn EpochPruningTask>>,
53    logger: Logger,
54}
55
56impl AggregatorUpkeepService {
57    /// Create a new instance of the aggregator upkeep service.
58    pub fn new(
59        main_db_connection: Arc<SqliteConnection>,
60        cardano_tx_connection_pool: Arc<SqliteConnectionPool>,
61        event_store_connection: Arc<SqliteConnection>,
62        signed_entity_type_lock: Arc<SignedEntityTypeLock>,
63        pruning_tasks: Vec<Arc<dyn EpochPruningTask>>,
64        logger: Logger,
65    ) -> Self {
66        Self {
67            main_db_connection,
68            cardano_tx_connection_pool,
69            event_store_connection,
70            signed_entity_type_lock,
71            pruning_tasks,
72            logger: logger.new_with_component_name::<Self>(),
73        }
74    }
75
76    async fn execute_pruning_tasks(&self, current_epoch: Epoch) -> StdResult<()> {
77        for task in &self.pruning_tasks {
78            info!(
79                self.logger, "Pruning stale data";
80                "pruned_data" => task.pruned_data(), "current_epoch" => ?current_epoch
81            );
82            task.prune(current_epoch).await?;
83        }
84
85        Ok(())
86    }
87
88    async fn upkeep_all_databases(&self) -> StdResult<()> {
89        if self.signed_entity_type_lock.has_locked_entities().await {
90            info!(
91                self.logger,
92                "Some entities are locked - Skipping database upkeep"
93            );
94            return Ok(());
95        }
96
97        let main_db_connection = self.main_db_connection.clone();
98        let cardano_tx_db_connection_pool = self.cardano_tx_connection_pool.clone();
99        let event_store_connection = self.event_store_connection.clone();
100        let db_upkeep_logger = self.logger.clone();
101
102        // Run the database upkeep tasks in another thread to avoid blocking the tokio runtime
103        let db_upkeep_thread = tokio::task::spawn_blocking(move || -> StdResult<()> {
104            info!(db_upkeep_logger, "Cleaning main database");
105            SqliteCleaner::new(&main_db_connection)
106                .with_logger(db_upkeep_logger.clone())
107                .with_tasks(&[SqliteCleaningTask::WalCheckpointTruncate])
108                .run()?;
109
110            info!(db_upkeep_logger, "Cleaning cardano transactions database");
111            let cardano_tx_db_connection = cardano_tx_db_connection_pool.connection()?;
112            SqliteCleaner::new(&cardano_tx_db_connection)
113                .with_logger(db_upkeep_logger.clone())
114                .with_tasks(&[SqliteCleaningTask::WalCheckpointTruncate])
115                .run()?;
116
117            info!(db_upkeep_logger, "Cleaning event database");
118            SqliteCleaner::new(&event_store_connection)
119                .with_logger(db_upkeep_logger.clone())
120                .with_tasks(&[SqliteCleaningTask::WalCheckpointTruncate])
121                .run()?;
122
123            Ok(())
124        });
125
126        db_upkeep_thread
127            .await
128            .with_context(|| "Database Upkeep thread crashed")?
129    }
130
131    async fn vacuum_main_database(&self) -> StdResult<()> {
132        if self.signed_entity_type_lock.has_locked_entities().await {
133            info!(
134                self.logger,
135                "Some entities are locked - Skipping main database vacuum"
136            );
137            return Ok(());
138        }
139
140        let main_db_connection = self.main_db_connection.clone();
141        let db_upkeep_logger = self.logger.clone();
142
143        // Run the database upkeep tasks in another thread to avoid blocking the tokio runtime
144        let db_upkeep_thread = tokio::task::spawn_blocking(move || -> StdResult<()> {
145            info!(db_upkeep_logger, "Vacuum main database");
146            SqliteCleaner::new(&main_db_connection)
147                .with_logger(db_upkeep_logger.clone())
148                .with_tasks(&[SqliteCleaningTask::Vacuum])
149                .run()?;
150
151            Ok(())
152        });
153
154        db_upkeep_thread
155            .await
156            .with_context(|| "Database Upkeep thread crashed")?
157    }
158}
159
160#[async_trait]
161impl UpkeepService for AggregatorUpkeepService {
162    async fn run(&self, current_epoch: Epoch) -> StdResult<()> {
163        info!(self.logger, "Start upkeep of the application");
164
165        self.execute_pruning_tasks(current_epoch)
166            .await
167            .with_context(|| "Pruning tasks failed")?;
168
169        self.upkeep_all_databases()
170            .await
171            .with_context(|| "Database upkeep failed")?;
172
173        info!(self.logger, "Upkeep finished");
174        Ok(())
175    }
176
177    async fn vacuum(&self) -> StdResult<()> {
178        info!(self.logger, "Start database vacuum");
179
180        self.vacuum_main_database()
181            .await
182            .with_context(|| "Vacuuming main database failed")?;
183
184        info!(self.logger, "Vacuum finished");
185
186        Ok(())
187    }
188}
189
190#[cfg(test)]
191mod tests {
192    use mithril_common::{entities::SignedEntityTypeDiscriminants, temp_dir_create};
193    use mockall::predicate::eq;
194
195    use crate::database::test_helper::{
196        cardano_tx_db_connection, cardano_tx_db_file_connection, main_db_connection,
197        main_db_file_connection,
198    };
199    use crate::event_store::database::test_helper::{
200        event_store_db_connection, event_store_db_file_connection,
201    };
202    use crate::test::TestLogger;
203
204    use super::*;
205
206    fn mock_epoch_pruning_task(
207        mock_config: impl FnOnce(&mut MockEpochPruningTask),
208    ) -> Arc<dyn EpochPruningTask> {
209        let mut task_mock = MockEpochPruningTask::new();
210        task_mock.expect_pruned_data().return_const("mock_data");
211        mock_config(&mut task_mock);
212        Arc::new(task_mock)
213    }
214
215    fn default_upkeep_service() -> AggregatorUpkeepService {
216        AggregatorUpkeepService::new(
217            Arc::new(main_db_connection().unwrap()),
218            Arc::new(SqliteConnectionPool::build(1, cardano_tx_db_connection).unwrap()),
219            Arc::new(event_store_db_connection().unwrap()),
220            Arc::new(SignedEntityTypeLock::default()),
221            vec![],
222            TestLogger::stdout(),
223        )
224    }
225
226    #[tokio::test]
227    async fn test_cleanup_database() {
228        let (logger, log_inspector) = TestLogger::memory();
229        let (main_db_path, ctx_db_path, event_store_db_path) = {
230            let db_dir = temp_dir_create!();
231            (
232                db_dir.join("main.db"),
233                db_dir.join("cardano_tx.db"),
234                db_dir.join("event_store.db"),
235            )
236        };
237
238        let main_db_connection = main_db_file_connection(&main_db_path).unwrap();
239        let cardano_tx_connection = cardano_tx_db_file_connection(&ctx_db_path).unwrap();
240        let event_store_connection = event_store_db_file_connection(&event_store_db_path).unwrap();
241
242        let service = AggregatorUpkeepService::new(
243            Arc::new(main_db_connection),
244            Arc::new(SqliteConnectionPool::build_from_connection(
245                cardano_tx_connection,
246            )),
247            Arc::new(event_store_connection),
248            Arc::new(SignedEntityTypeLock::default()),
249            vec![],
250            logger,
251        );
252
253        service.run(Epoch(5)).await.expect("Upkeep service failed");
254
255        assert_eq!(
256            log_inspector
257                .search_logs(SqliteCleaningTask::WalCheckpointTruncate.log_message())
258                .len(),
259            3,
260            "Should have run three times since the three databases have a `WalCheckpointTruncate` cleanup"
261        );
262        assert!(
263            log_inspector
264                .search_logs(SqliteCleaningTask::Vacuum.log_message())
265                .is_empty(),
266            "Upkeep operation should not include Vacuum tasks"
267        );
268    }
269
270    #[tokio::test]
271    async fn test_doesnt_cleanup_db_if_any_entity_is_locked() {
272        let (logger, log_inspector) = TestLogger::memory();
273
274        let signed_entity_type_lock = Arc::new(SignedEntityTypeLock::default());
275        signed_entity_type_lock
276            .lock(SignedEntityTypeDiscriminants::CardanoTransactions)
277            .await;
278
279        let service = AggregatorUpkeepService {
280            signed_entity_type_lock: signed_entity_type_lock.clone(),
281            logger,
282            ..default_upkeep_service()
283        };
284        service.run(Epoch(5)).await.expect("Upkeep service failed");
285
286        assert!(
287            log_inspector
288                .search_logs(SqliteCleaningTask::WalCheckpointTruncate.log_message())
289                .is_empty()
290        );
291    }
292    #[tokio::test]
293    async fn test_execute_all_pruning_tasks() {
294        let task1 = mock_epoch_pruning_task(|mock| {
295            mock.expect_prune().once().with(eq(Epoch(14))).returning(|_| Ok(()));
296        });
297        let task2 = mock_epoch_pruning_task(|mock| {
298            mock.expect_prune().once().with(eq(Epoch(14))).returning(|_| Ok(()));
299        });
300
301        let service = AggregatorUpkeepService {
302            pruning_tasks: vec![task1, task2],
303            ..default_upkeep_service()
304        };
305
306        service.run(Epoch(14)).await.expect("Upkeep service failed");
307    }
308
309    #[tokio::test]
310    async fn test_doesnt_vacuum_db_if_any_entity_is_locked() {
311        let (logger, log_inspector) = TestLogger::memory();
312
313        let signed_entity_type_lock = Arc::new(SignedEntityTypeLock::default());
314        signed_entity_type_lock
315            .lock(SignedEntityTypeDiscriminants::CardanoTransactions)
316            .await;
317
318        let service = AggregatorUpkeepService {
319            signed_entity_type_lock: signed_entity_type_lock.clone(),
320            logger,
321            ..default_upkeep_service()
322        };
323        service.vacuum().await.expect("Vacuum failed");
324
325        assert!(
326            log_inspector
327                .search_logs(SqliteCleaningTask::Vacuum.log_message())
328                .is_empty()
329        );
330    }
331
332    #[tokio::test]
333    async fn test_vacuum_database() {
334        let (logger, log_inspector) = TestLogger::memory();
335
336        let signed_entity_type_lock = Arc::new(SignedEntityTypeLock::default());
337        signed_entity_type_lock
338            .lock(SignedEntityTypeDiscriminants::CardanoTransactions)
339            .await;
340
341        let service = AggregatorUpkeepService {
342            logger,
343            ..default_upkeep_service()
344        };
345        service.vacuum().await.expect("Vacuum failed");
346
347        assert_eq!(
348            log_inspector
349                .search_logs(SqliteCleaningTask::Vacuum.log_message())
350                .len(),
351            1,
352        );
353    }
354}