Introduction
The Logger
package provides a flexible and powerful
logging system for R applications. It includes a Logger
class for creating customisable loggers, as well as helper functions for
debugging and error reporting. This vignette will guide you through the
basics of using the Logger
package and demonstrate how to
leverage its features to improve your R workflows.
Installation
You can install the Logger
package from GitHub with:
remotes::install_github("derecksprojects/R-Logger")
Basic Usage
First, let’s load the package and create a basic logger:
box::use(Logger[Logger, LogLevel, messageParallel])
# Create a basic logger
log <- Logger$new()
# Log some messages
log$info("This is an informational message")
#> 2024-08-19T01:32:48.478Z INFO This is an informational message
log$warn("This is a warning")
#> 2024-08-19T01:32:48.482Z WARNING This is a warning
log$error("This is an error")
#> 2024-08-19T01:32:48.521Z ERROR This is an error
Customising the Logger
You can customise the logger by specifying the minimum log level, output file, and custom print function:
# Create a custom logger
custom_log <- Logger$new(
level = LogLevel$WARNING,
file_path = "app.log",
print_fn = message
)
custom_log$info("This won't be logged")
custom_log$warn("This will be logged to console and file")
#> 2024-08-19T01:32:48.730Z WARNING This will be logged to console and file
custom_log$error("This is an error message")
#> 2024-08-19T01:32:48.732Z ERROR This is an error message
Logging to a Database
The Logger
class supports logging to a
SQLite
database. Here’s how you can set it up:
box::use(RSQLite[ SQLite ])
box::use(DBI[ dbConnect, dbDisconnect, dbGetQuery ])
# Create a database connection
db <- dbConnect(SQLite(), "log.sqlite")
# Create a logger that logs to the database
db_log <- Logger$new(
db_conn = db,
table_name = "app_logs"
)
# Log some messages
db_log$info("This is logged to the database")
#> 2024-08-19T01:32:49.010Z INFO This is logged to the database
db_log$warn("This is a warning", data = list(code = 101))
#> 2024-08-19T01:32:49.021Z WARNING This is a warning
#> Data:
#> {
#> "code": 101
#> }
db_log$error("An error occurred", error = "Division by zero")
#> 2024-08-19T01:32:49.135Z ERROR An error occurred
#> Error:
#> "Division by zero"
# Example of querying the logs
query <- "SELECT * FROM app_logs WHERE level = 'ERROR'"
result <- dbGetQuery(db, query)
print(result)
#> id datetime level context msg data
#> 1 3 2024-08-19T01:32:49.135Z ERROR <NA> An error occurred <NA>
#> error
#> 1 ["[\\"Division by zero\\"]"]
Using Context
The Logger
class now supports a context feature, which
allows you to add persistent information to your log entries:
context_log <- Logger$new(
db_conn = db,
table_name = "context_logs",
context = list(app_name = "MyApp", version = "1.0.0")
)
context_log$info("Application started")
#> 2024-08-19T01:32:49.222Z INFO Application started
#> Context:
#> {
#> "app_name": "MyApp",
#> "version": "1.0.0"
#> }
# Update context
context_log$update_context(list(user_id = "12345"))
context_log$info("User logged in")
#> 2024-08-19T01:32:49.235Z INFO User logged in
#> Context:
#> {
#> "app_name": "MyApp",
#> "version": "1.0.0",
#> "user_id": "12345"
#> }
# Log an error with context
context_log$error("Operation failed", data = list(operation = "data_fetch"))
#> 2024-08-19T01:32:49.246Z ERROR Operation failed
#> Data:
#> {
#> "operation": "data_fetch"
#> }
#> Context:
#> {
#> "app_name": "MyApp",
#> "version": "1.0.0",
#> "user_id": "12345"
#> }
# Example of querying logs with context
query <- "SELECT * FROM context_logs WHERE json_extract(context, '$.user_id') = '12345'"
result <- dbGetQuery(db, query)
print(result)
#> [1] id datetime level context msg data error
#> <0 rows> (or 0-length row.names)
# Clear context
context_log$clear_context()
context_log$info("Context cleared")
#> 2024-08-19T01:32:49.262Z INFO Context cleared
Combining Features
You can combine various features of the Logger
class to
create a powerful logging system:
# Create a combined logger
combined_log <- Logger$new(
level = LogLevel$INFO,
file_path = "combined_app.log",
db_conn = db,
table_name = "combined_logs",
context = list(app_name = "CombinedApp", version = "2.0.0"),
print_fn = messageParallel,
format_fn = function(level, msg) {
# manipulate the message before logging
msg <- gsub("API_KEY=[^\\s]+", "API_KEY=***", msg)
return(paste(level, msg))
}
)
# Log some messages
combined_log$info("Application started")
combined_log$warn("Low memory", data = list(available_mb = 100))
combined_log$error("Database connection failed", error = "Connection timeout")
# Update context
combined_log$update_context(list(user_id = "67890"))
combined_log$info("User action", data = list(action = "button_click"))
# Example of a more complex query using context and data
query <- "
SELECT *
FROM combined_logs
WHERE json_extract(context, '$.app_name') = 'CombinedApp'
AND json_extract(data, '$.available_mb') < 200
"
result <- dbGetQuery(db, query)
print(result)
#> [1] id datetime level context msg data error
#> <0 rows> (or 0-length row.names)
# Don't forget to close the database connection when you're done
dbDisconnect(db)
Using Helper Functions
The Logger
package includes several helper functions
that can be used in conjunction with the Logger
class to
provide more detailed information in your logs. Let’s explore how to use
these functions effectively.
Finding and Logging Data Issues
Suppose we have a dataset with some problematic values, and we want
to log where these issues occur. We can use the
valueCoordinates
function to locate the problematic values
and include this information in our log messages.
box::use(Logger[valueCoordinates])
# Create a sample dataset with some issues
df <- data.frame(
a = c(1, NA, 3, 4, 5),
b = c(2, 4, NA, 8, 10),
c = c(3, 6, 9, NA, 15)
)
# Create a logger
log <- Logger$new()
# Find coordinates of NA values
na_coords <- valueCoordinates(df)
if (nrow(na_coords) > 0) {
log$warn(
"NA values found in the dataset",
data = list(
na_locations = na_coords
)
)
}
#> 2024-08-19T01:32:49.484Z WARNING NA values found in the dataset
#> Data:
#> {
#> "na_locations": [
#> {
#> "column": 1,
#> "row": 2
#> },
#> {
#> "column": 2,
#> "row": 3
#> },
#> {
#> "column": 3,
#> "row": 4
#> }
#> ]
#> }
This will produce a log entry like:
Logging Errors with Context
When an error occurs, it’s often useful to catch and log not just the
error message, but also the context in which the error occurred. Here’s
an example of how to do this using the Logger
class and
helper functions:
box::use(Logger[tableToString])
log <- Logger$new()
process_data <- function(df) {
tryCatch({
result <- df$a / df$b
if (any(is.infinite(result))) {
inf_coords <- valueCoordinates(data.frame(result), Inf)
log$error(
"Division by zero occurred",
data = list(
infinite_values = inf_coords,
dataset_preview = tableToString(df)
)
)
cat("Division by zero error")
}
return(result)
}, error = function(e) {
log$error(
paste("An error occurred while processing data:", e$message),
data = list(dataset_preview = tableToString(df)),
error = e
)
cat(e)
})
}
# Test the function with problematic data
df <- data.frame(a = c(1, 2, 3), b = c(0, 2, 0))
process_data(df)
#> 2024-08-19T01:32:49.588Z ERROR Division by zero occurred
#> Data:
#> {
#> "infinite_values": [
#> {
#> "column": 1,
#> "row": 1
#> },
#> {
#> "column": 1,
#> "row": 3
#> }
#> ],
#> "dataset_preview": " a b\n1 1 0\n2 2 2\n3 3 0"
#> }
#> Division by zero error
#> [1] Inf 1 Inf
Logging in Parallel Environments
When working with parallel processing, standard logging functions
might not work as expected. The Logger
package provides a
messageParallel
function to ensure messages are properly
logged from parallel processes:
box::use(future)
box::use(future.apply[future_lapply])
log <- Logger$new(print_fn = messageParallel)
future::plan(future::multisession, workers = 2)
result <- future_lapply(1:5, function(i) {
messageParallel(sprintf("Processing item %d", i))
if (i == 3) {
log$warn(sprintf("Warning for item %d", i))
}
return(i * 2)
})
future::plan(future::sequential)
This ensures that messages from parallel processes are properly captured and logged.
Conclusion
The Logger
package provides a robust and flexible
logging system for R applications. With features like file logging,
database logging, and context management, you can create informative and
context-rich log messages that greatly aid in debugging and monitoring
your R scripts and applications.
Moreover, by using helper functions like
valueCoordinates
and tableToString
you can
more easily track down and log data issues and errors, providing
valuable information for troubleshooting and analysis.
Remember to adjust the log level, output file, database settings, and other parameters to suit your specific needs. The ability to query logs using SQL, especially with context-based filtering, makes it easy to analyze and troubleshoot issues in your applications.