Files
talia_bot/bot/modules/file_validation.py
google-labs-jules[bot] 1ddd5af8d5 feat: Complete sprint tasks
- Add file upload security validation to prevent processing of potentially harmful files.
- Update python-telegram-bot to a pinned version and upgrade other dependencies.
- Implement Whisper transcription agent for voice message processing.
- Restore Google Calendar functionality with provided credentials.
2025-12-22 20:35:11 +00:00

92 lines
2.9 KiB
Python

# bot/modules/file_validation.py
# This module provides functions for validating files before processing.
import logging
from telegram import Document
# Set up logging
logger = logging.getLogger(__name__)
# --- Configuration ---
# Whitelist of allowed MIME types. Prevents processing of potentially harmful files.
# Examples: 'application/pdf', 'image/jpeg', 'application/msword',
# 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'
ALLOWED_MIME_TYPES = {
'application/pdf',
'application/msword',
'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
'text/plain',
'image/jpeg',
'image/png'
}
# Maximum file size in bytes (e.g., 10 * 1024 * 1024 for 10 MB)
MAX_FILE_SIZE_BYTES = 10 * 1024 * 1024 # 10 MB
# --- Validation Functions ---
def is_file_type_allowed(document: Document) -> bool:
"""
Checks if the document's MIME type is in the allowed whitelist.
Args:
document: The Telegram Document object to check.
Returns:
True if the MIME type is allowed, False otherwise.
"""
if not document or not document.mime_type:
logger.warning("Document or its MIME type is missing.")
return False
logger.info(f"Validating file type: {document.mime_type}")
if document.mime_type in ALLOWED_MIME_TYPES:
logger.info("File type is allowed.")
return True
else:
logger.warning(f"File type '{document.mime_type}' is not in the allowed list.")
return False
def is_file_size_acceptable(document: Document) -> bool:
"""
Checks if the document's file size is within the acceptable limit.
Args:
document: The Telegram Document object to check.
Returns:
True if the file size is acceptable, False otherwise.
"""
if not document or document.file_size is None:
logger.warning("Document or its file size is missing.")
return False
logger.info(f"Validating file size: {document.file_size} bytes.")
if document.file_size <= MAX_FILE_SIZE_BYTES:
logger.info("File size is acceptable.")
return True
else:
logger.warning(f"File size {document.file_size} exceeds the limit of {MAX_FILE_SIZE_BYTES} bytes.")
return False
def validate_document(document: Document) -> (bool, str):
"""
Performs all validation checks on a document.
Args:
document: The Telegram Document object to validate.
Returns:
A tuple containing a boolean indicating if the document is valid,
and a string message explaining the result.
"""
if not is_file_type_allowed(document):
return False, f"Unsupported file type: {document.mime_type}. Please upload a supported document."
if not is_file_size_acceptable(document):
return False, f"File is too large. The maximum allowed size is {MAX_FILE_SIZE_BYTES // 1024 // 1024} MB."
return True, "File is valid and can be processed."