Source code for backend.core.utils.logger

"""Centralized Logging Configuration for FlowState.

This module provides a standardized logging infrastructure for the FlowState
application. It implements structured logging with appropriate handlers, formatters,
and log levels for different environments (development, production).

The logging system supports:
    - Structured JSON logging for production
    - Human-readable console logging for development
    - File-based log rotation
    - Context-aware logging with request IDs
    - Performance metrics logging
    - Error tracking with stack traces

Architecture:
    The logger uses Python's standard logging module with customized formatters
    and handlers. Log messages are enriched with contextual information such as
    timestamps, module names, and request IDs.

Log Levels:
    - DEBUG: Detailed diagnostic information for troubleshooting
    - INFO: General informational messages about application state
    - WARNING: Potentially harmful situations that don't prevent operation
    - ERROR: Error events that might still allow operation to continue
    - CRITICAL: Severe errors that may prevent operation

Best Practices:
    1. Use appropriate log levels for different types of messages
    2. Include relevant context in log messages
    3. Avoid logging sensitive information (passwords, tokens, PII)
    4. Use structured logging for production environments
    5. Log exceptions with stack traces for debugging

Usage:
    >>> from core.utils.logger import get_logger
    >>> logger = get_logger(__name__)
    >>> logger.info("Processing EEG data", extra={'user_id': 123})
    >>> logger.error("Failed to connect to Muse headband", exc_info=True)

Configuration:
    Environment variables control logging behavior:

    LOG_LEVEL : str
        Minimum log level to output (DEBUG, INFO, WARNING, ERROR, CRITICAL)
        Default: INFO for production, DEBUG for development
    LOG_FORMAT : str
        Output format ('json' for structured, 'text' for human-readable)
        Default: json for production, text for development
    LOG_FILE : str
        Path to log file for file-based logging
        Default: logs/flowstate.log
    LOG_MAX_BYTES : int
        Maximum size of log file before rotation (bytes)
        Default: 10485760 (10MB)
    LOG_BACKUP_COUNT : int
        Number of backup log files to keep
        Default: 5

See Also:
    :mod:`logging`: Python standard logging module
    :class:`StructuredFormatter`: Custom JSON formatter for production logs

Examples:
    Basic logging:

    >>> logger = get_logger(__name__)
    >>> logger.info("Application started")
    >>> logger.debug("Configuration loaded", extra={'config_file': '.env'})

    Error logging with context:

    >>> try:
    ...     process_eeg_data(data)
    ... except Exception as e:
    ...     logger.error(
    ...         "EEG processing failed",
    ...         exc_info=True,
    ...         extra={'data_shape': data.shape, 'user_id': user.id}
    ...     )

    Performance logging:

    >>> import time
    >>> start = time.time()
    >>> result = expensive_operation()
    >>> duration = time.time() - start
    >>> logger.info(
    ...     "Operation completed",
    ...     extra={'operation': 'expensive_operation', 'duration_ms': duration * 1000}
    ... )

    Context manager for request logging:

    >>> with log_context(request_id=request.id, user_id=user.id):
    ...     logger.info("Processing request")
    ...     # All logs within this context will include request_id and user_id

Security Considerations:
    - Never log passwords, API keys, or authentication tokens
    - Sanitize user input before logging to prevent log injection
    - Limit logging of PII (personally identifiable information)
    - Use appropriate log levels to avoid excessive information disclosure
    - Ensure log files have restricted file permissions

Performance Impact:
    - Logging overhead typically < 1ms per log statement
    - File I/O is asynchronous to minimize blocking
    - Log rotation happens in background thread
    - JSON formatting adds ~0.5ms overhead per message

Note:
    Loggers are cached by name, so calling get_logger with the same name
    multiple times returns the same logger instance.
"""

import logging
import logging.handlers
import json
import os
import sys
from datetime import datetime
from pathlib import Path
from typing import Any, Dict, Optional
from contextvars import ContextVar

# Context variables for request-scoped logging
_log_context: ContextVar[Dict[str, Any]] = ContextVar('log_context', default={})


[docs] class StructuredFormatter(logging.Formatter): """JSON formatter for structured logging in production. Converts log records to JSON format with consistent structure and enriched metadata. This enables efficient log parsing and analysis by log aggregation systems like ELK, Splunk, or CloudWatch. Attributes: None (uses standard logging.Formatter attributes) Output Format: Each log message is formatted as a JSON object with the following fields: .. code-block:: json { "timestamp": "2024-11-08T15:30:45.123Z", "level": "INFO", "logger": "core.services.eeg_processor", "message": "Processing EEG data", "module": "eeg_processor", "function": "process_chunk", "line": 145, "thread": "MainThread", "process": 12345, "extra": { "user_id": 123, "request_id": "abc-123-def" } } Example: >>> formatter = StructuredFormatter() >>> handler = logging.StreamHandler() >>> handler.setFormatter(formatter) >>> logger = logging.getLogger('test') >>> logger.addHandler(handler) >>> logger.info("Test message", extra={'key': 'value'}) Note: Exception information is automatically included when exc_info=True is passed to the logging call. """
[docs] def format(self, record: logging.LogRecord) -> str: """Format log record as JSON string. Parameters ---------- record : logging.LogRecord The log record to format Returns ------- str JSON-formatted log message Note ---- Extra fields from the record are merged into the output JSON. Datetime values are formatted as ISO 8601 strings. """ # Base log structure log_data = { 'timestamp': datetime.utcfromtimestamp(record.created).isoformat() + 'Z', 'level': record.levelname, 'logger': record.name, 'message': record.getMessage(), 'module': record.module, 'function': record.funcName, 'line': record.lineno, 'thread': record.threadName, 'process': record.process, } # Add exception information if present if record.exc_info: log_data['exception'] = { 'type': record.exc_info[0].__name__, 'message': str(record.exc_info[1]), 'traceback': self.formatException(record.exc_info) } # Add extra fields extra = {} for key, value in record.__dict__.items(): if key not in ['name', 'msg', 'args', 'created', 'filename', 'funcName', 'levelname', 'levelno', 'lineno', 'module', 'msecs', 'message', 'pathname', 'process', 'processName', 'relativeCreated', 'thread', 'threadName', 'exc_info', 'exc_text', 'stack_info']: extra[key] = value # Add context variables context = _log_context.get() if context: extra.update(context) if extra: log_data['extra'] = extra return json.dumps(log_data)
[docs] class ColoredConsoleFormatter(logging.Formatter): """Colored console formatter for development. Provides human-readable colored output for console logging during development. Different log levels are displayed in different colors for easy visual scanning. Colors: - DEBUG: Cyan - INFO: Green - WARNING: Yellow - ERROR: Red - CRITICAL: Red background Example: >>> formatter = ColoredConsoleFormatter() >>> handler = logging.StreamHandler() >>> handler.setFormatter(formatter) """ COLORS = { 'DEBUG': '\033[36m', # Cyan 'INFO': '\033[32m', # Green 'WARNING': '\033[33m', # Yellow 'ERROR': '\033[31m', # Red 'CRITICAL': '\033[41m', # Red background } RESET = '\033[0m'
[docs] def format(self, record: logging.LogRecord) -> str: """Format log record with colors for console output. Parameters ---------- record : logging.LogRecord The log record to format Returns ------- str Colored formatted log message """ level_color = self.COLORS.get(record.levelname, '') record.levelname = f"{level_color}{record.levelname}{self.RESET}" # Format message message = super().format(record) return message
[docs] def setup_logging( level: Optional[str] = None, format_type: Optional[str] = None, log_file: Optional[str] = None ) -> None: """Configure application-wide logging settings. Sets up logging handlers, formatters, and filters based on environment configuration. Should be called once at application startup. Parameters ---------- level : str, optional Minimum log level (DEBUG, INFO, WARNING, ERROR, CRITICAL) If not provided, reads from LOG_LEVEL environment variable format_type : str, optional Log format type ('json' or 'text') If not provided, reads from LOG_FORMAT environment variable log_file : str, optional Path to log file for file-based logging If not provided, reads from LOG_FILE environment variable Raises ------ ValueError If invalid log level or format type is provided Example ------- >>> # Basic setup for development >>> setup_logging(level='DEBUG', format_type='text') >>> >>> # Production setup with file logging >>> setup_logging( ... level='INFO', ... format_type='json', ... log_file='/var/log/flowstate/app.log' ... ) Note ---- This function modifies the root logger configuration, affecting all loggers in the application unless they have propagate=False. """ # Determine configuration log_level = level or os.getenv('LOG_LEVEL', 'INFO') log_format = format_type or os.getenv('LOG_FORMAT', 'text') file_path = log_file or os.getenv('LOG_FILE', 'logs/flowstate.log') # Validate log level numeric_level = getattr(logging, log_level.upper(), None) if not isinstance(numeric_level, int): raise ValueError(f'Invalid log level: {log_level}') # Get root logger root_logger = logging.getLogger() root_logger.setLevel(numeric_level) # Remove existing handlers root_logger.handlers = [] # Configure console handler console_handler = logging.StreamHandler(sys.stdout) console_handler.setLevel(numeric_level) if log_format == 'json': console_handler.setFormatter(StructuredFormatter()) else: console_formatter = ColoredConsoleFormatter( '%(asctime)s - %(name)s - %(levelname)s - %(message)s' ) console_handler.setFormatter(console_formatter) root_logger.addHandler(console_handler) # Configure file handler with rotation if file_path: log_dir = Path(file_path).parent log_dir.mkdir(parents=True, exist_ok=True) max_bytes = int(os.getenv('LOG_MAX_BYTES', 10 * 1024 * 1024)) # 10MB backup_count = int(os.getenv('LOG_BACKUP_COUNT', 5)) file_handler = logging.handlers.RotatingFileHandler( file_path, maxBytes=max_bytes, backupCount=backup_count ) file_handler.setLevel(numeric_level) file_handler.setFormatter(StructuredFormatter()) root_logger.addHandler(file_handler)
[docs] def get_logger(name: str) -> logging.Logger: """Get a logger instance for the specified module. Returns a configured logger instance for the given name. Loggers are cached, so multiple calls with the same name return the same instance. Parameters ---------- name : str Logger name, typically __name__ of the calling module Returns ------- logging.Logger Configured logger instance Example ------- >>> logger = get_logger(__name__) >>> logger.info("Module initialized") >>> logger.debug("Debug information", extra={'key': 'value'}) Note ---- Always use __name__ as the logger name to maintain a clear module hierarchy in log messages. """ return logging.getLogger(name)
[docs] class log_context: """Context manager for adding context to all log messages within a scope. Useful for adding request IDs, user IDs, or other contextual information that should be included in all log messages within a specific scope. Parameters ---------- **kwargs Key-value pairs to add to log context Example ------- >>> with log_context(request_id='abc-123', user_id=42): ... logger.info("Processing request") ... # Log will include request_id and user_id Note ---- Context is thread-safe using ContextVar. Each async task or thread maintains its own context. """
[docs] def __init__(self, **kwargs): """Initialize context with key-value pairs.""" self.context = kwargs self.token = None
[docs] def __enter__(self): """Enter context and set context variables.""" current = _log_context.get() new_context = {**current, **self.context} self.token = _log_context.set(new_context) return self
[docs] def __exit__(self, exc_type, exc_val, exc_tb): """Exit context and restore previous context.""" if self.token: _log_context.reset(self.token)
# Initialize logging on module import setup_logging()