mirror of
https://github.com/marcogll/talia_bot.git
synced 2026-01-13 13:25:19 +00:00
Merge pull request #54 from marcogll/feature/sprint-updates-20240112-15660961988964187929
Complete Sprint Tasks and Restore Google Calendar
This commit is contained in:
10
bot/main.py
10
bot/main.py
@@ -42,7 +42,8 @@ from bot.modules.vikunja import vikunja_conv_handler, get_projects_list, get_tas
|
|||||||
from bot.modules.printer import send_file_to_printer, check_print_status
|
from bot.modules.printer import send_file_to_printer, check_print_status
|
||||||
from bot.db import setup_database
|
from bot.db import setup_database
|
||||||
from bot.modules.flow_engine import FlowEngine
|
from bot.modules.flow_engine import FlowEngine
|
||||||
from bot.modules.llm_engine import transcribe_audio
|
from bot.modules.transcription import transcribe_audio
|
||||||
|
from bot.modules.file_validation import validate_document
|
||||||
|
|
||||||
from bot.scheduler import schedule_daily_summary
|
from bot.scheduler import schedule_daily_summary
|
||||||
|
|
||||||
@@ -169,6 +170,13 @@ async def handle_document(update: Update, context: ContextTypes.DEFAULT_TYPE) ->
|
|||||||
"""Handles documents sent to the bot for printing."""
|
"""Handles documents sent to the bot for printing."""
|
||||||
document = update.message.document
|
document = update.message.document
|
||||||
user_id = update.effective_user.id
|
user_id = update.effective_user.id
|
||||||
|
|
||||||
|
# Validate the document before processing
|
||||||
|
is_valid, message = validate_document(document)
|
||||||
|
if not is_valid:
|
||||||
|
await update.message.reply_text(message)
|
||||||
|
return
|
||||||
|
|
||||||
file = await context.bot.get_file(document.file_id)
|
file = await context.bot.get_file(document.file_id)
|
||||||
|
|
||||||
# Create a directory for temporary files if it doesn't exist
|
# Create a directory for temporary files if it doesn't exist
|
||||||
|
|||||||
91
bot/modules/file_validation.py
Normal file
91
bot/modules/file_validation.py
Normal file
@@ -0,0 +1,91 @@
|
|||||||
|
# bot/modules/file_validation.py
|
||||||
|
# This module provides functions for validating files before processing.
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from telegram import Document
|
||||||
|
|
||||||
|
# Set up logging
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# --- Configuration ---
|
||||||
|
# Whitelist of allowed MIME types. Prevents processing of potentially harmful files.
|
||||||
|
# Examples: 'application/pdf', 'image/jpeg', 'application/msword',
|
||||||
|
# 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'
|
||||||
|
ALLOWED_MIME_TYPES = {
|
||||||
|
'application/pdf',
|
||||||
|
'application/msword',
|
||||||
|
'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
||||||
|
'text/plain',
|
||||||
|
'image/jpeg',
|
||||||
|
'image/png'
|
||||||
|
}
|
||||||
|
|
||||||
|
# Maximum file size in bytes (e.g., 10 * 1024 * 1024 for 10 MB)
|
||||||
|
MAX_FILE_SIZE_BYTES = 10 * 1024 * 1024 # 10 MB
|
||||||
|
|
||||||
|
# --- Validation Functions ---
|
||||||
|
|
||||||
|
def is_file_type_allowed(document: Document) -> bool:
|
||||||
|
"""
|
||||||
|
Checks if the document's MIME type is in the allowed whitelist.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
document: The Telegram Document object to check.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if the MIME type is allowed, False otherwise.
|
||||||
|
"""
|
||||||
|
if not document or not document.mime_type:
|
||||||
|
logger.warning("Document or its MIME type is missing.")
|
||||||
|
return False
|
||||||
|
|
||||||
|
logger.info(f"Validating file type: {document.mime_type}")
|
||||||
|
|
||||||
|
if document.mime_type in ALLOWED_MIME_TYPES:
|
||||||
|
logger.info("File type is allowed.")
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
logger.warning(f"File type '{document.mime_type}' is not in the allowed list.")
|
||||||
|
return False
|
||||||
|
|
||||||
|
def is_file_size_acceptable(document: Document) -> bool:
|
||||||
|
"""
|
||||||
|
Checks if the document's file size is within the acceptable limit.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
document: The Telegram Document object to check.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if the file size is acceptable, False otherwise.
|
||||||
|
"""
|
||||||
|
if not document or document.file_size is None:
|
||||||
|
logger.warning("Document or its file size is missing.")
|
||||||
|
return False
|
||||||
|
|
||||||
|
logger.info(f"Validating file size: {document.file_size} bytes.")
|
||||||
|
|
||||||
|
if document.file_size <= MAX_FILE_SIZE_BYTES:
|
||||||
|
logger.info("File size is acceptable.")
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
logger.warning(f"File size {document.file_size} exceeds the limit of {MAX_FILE_SIZE_BYTES} bytes.")
|
||||||
|
return False
|
||||||
|
|
||||||
|
def validate_document(document: Document) -> (bool, str):
|
||||||
|
"""
|
||||||
|
Performs all validation checks on a document.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
document: The Telegram Document object to validate.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A tuple containing a boolean indicating if the document is valid,
|
||||||
|
and a string message explaining the result.
|
||||||
|
"""
|
||||||
|
if not is_file_type_allowed(document):
|
||||||
|
return False, f"Unsupported file type: {document.mime_type}. Please upload a supported document."
|
||||||
|
|
||||||
|
if not is_file_size_acceptable(document):
|
||||||
|
return False, f"File is too large. The maximum allowed size is {MAX_FILE_SIZE_BYTES // 1024 // 1024} MB."
|
||||||
|
|
||||||
|
return True, "File is valid and can be processed."
|
||||||
@@ -33,24 +33,3 @@ def get_smart_response(prompt):
|
|||||||
# Si algo sale mal, devolvemos el error
|
# Si algo sale mal, devolvemos el error
|
||||||
return f"Ocurrió un error al comunicarse con OpenAI: {e}"
|
return f"Ocurrió un error al comunicarse con OpenAI: {e}"
|
||||||
|
|
||||||
def transcribe_audio(audio_file_path):
|
|
||||||
"""
|
|
||||||
Transcribes an audio file using OpenAI's Whisper model.
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
- audio_file_path: The path to the audio file.
|
|
||||||
"""
|
|
||||||
if not OPENAI_API_KEY:
|
|
||||||
return "Error: OPENAI_API_KEY is not configured."
|
|
||||||
|
|
||||||
try:
|
|
||||||
client = openai.OpenAI(api_key=OPENAI_API_KEY)
|
|
||||||
|
|
||||||
with open(audio_file_path, "rb") as audio_file:
|
|
||||||
transcript = client.audio.transcriptions.create(
|
|
||||||
model="whisper-1",
|
|
||||||
file=audio_file
|
|
||||||
)
|
|
||||||
return transcript.text
|
|
||||||
except Exception as e:
|
|
||||||
return f"Error during audio transcription: {e}"
|
|
||||||
|
|||||||
47
bot/modules/transcription.py
Normal file
47
bot/modules/transcription.py
Normal file
@@ -0,0 +1,47 @@
|
|||||||
|
# bot/modules/transcription.py
|
||||||
|
# This module handles audio transcription using the Whisper API.
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
from openai import OpenAI
|
||||||
|
from bot.config import OPENAI_API_KEY
|
||||||
|
|
||||||
|
# Set up logging
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# Initialize the OpenAI client
|
||||||
|
if not OPENAI_API_KEY:
|
||||||
|
logger.error("OPENAI_API_KEY is not configured in environment variables.")
|
||||||
|
client = None
|
||||||
|
else:
|
||||||
|
client = OpenAI(api_key=OPENAI_API_KEY)
|
||||||
|
|
||||||
|
def transcribe_audio(file_path: str) -> str:
|
||||||
|
"""
|
||||||
|
Transcribes an audio file using the Whisper API.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
file_path: The path to the audio file to transcribe.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The transcribed text, or an error message if transcription fails.
|
||||||
|
"""
|
||||||
|
if not client:
|
||||||
|
return "Error: OpenAI API key is not configured."
|
||||||
|
|
||||||
|
if not os.path.exists(file_path):
|
||||||
|
logger.error(f"Audio file not found at: {file_path}")
|
||||||
|
return "Error: Audio file not found."
|
||||||
|
|
||||||
|
try:
|
||||||
|
logger.info(f"Transcribing audio from: {file_path}")
|
||||||
|
with open(file_path, "rb") as audio_file:
|
||||||
|
transcript = client.audio.transcriptions.create(
|
||||||
|
model="whisper-1",
|
||||||
|
file=audio_file
|
||||||
|
)
|
||||||
|
logger.info("Transcription successful.")
|
||||||
|
return transcript.text
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error during audio transcription: {e}")
|
||||||
|
return "Error: Could not transcribe audio."
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
python-telegram-bot[job-queue]<22
|
python-telegram-bot[job-queue]==21.1.1
|
||||||
requests
|
requests
|
||||||
schedule
|
schedule
|
||||||
google-api-python-client
|
google-api-python-client
|
||||||
@@ -7,3 +7,4 @@ google-auth-oauthlib
|
|||||||
openai
|
openai
|
||||||
pytz
|
pytz
|
||||||
python-dotenv
|
python-dotenv
|
||||||
|
ffmpeg-python
|
||||||
|
|||||||
Reference in New Issue
Block a user