feat: Implement core application structure, AI extraction, persistence, and Telegram bot modules with updated configuration and dependencies.

This commit is contained in:
Marco Gallegos
2025-12-18 12:15:04 -06:00
parent 7276e480b0
commit 899482580e
45 changed files with 1157 additions and 225 deletions

85
app/router.py Normal file
View File

@@ -0,0 +1,85 @@
"""
Main application router.
Orchestrates the entire expense processing workflow, from input to persistence.
"""
import logging
from app.schema.base import RawInput, ProvisionalExpense, FinalExpense, ExpenseStatus
from app.ingestion import text, image, audio, document
from app.ai import extractor, classifier
from app.persistence import repositories
from sqlalchemy.orm import Session
logger = logging.getLogger(__name__)
def process_expense_input(db: Session, raw_input: RawInput) -> FinalExpense:
"""
Full pipeline for processing a raw input.
1. Ingestion: Convert input (text, image, etc.) to raw text.
2. AI Extraction: Parse the raw text into structured data.
3. AI Classification/Audit: Validate and categorize the expense.
4. Persistence: Save the final, confirmed expense to the database.
"""
logger.info(f"Router processing input for user {raw_input.user_id} of type {raw_input.input_type}")
# 1. Ingestion
raw_text = ""
if raw_input.input_type == "text":
raw_text = text.process_text_input(raw_input.data)
elif raw_input.input_type == "image":
# In a real app, data would be bytes, not a string path
raw_text = image.process_image_input(raw_input.data.encode())
elif raw_input.input_type == "audio":
raw_text = audio.process_audio_input(raw_input.data.encode())
elif raw_input.input_type == "document":
raw_text = document.process_document_input(raw_input.data.encode())
else:
raise ValueError(f"Unsupported input type: {raw_input.input_type}")
if not raw_text:
logger.error("Ingestion phase resulted in empty text. Aborting.")
# We might want to return a specific status here
return None
# 2. AI Extraction
extracted_data = extractor.extract_expense_data(raw_text)
if not extracted_data.amount or not extracted_data.description:
logger.error("AI extraction failed to find key details. Aborting.")
return None
# 3. AI Classification & Confirmation (simplified)
# In a real bot, you would present this to the user for confirmation.
provisional_expense = ProvisionalExpense(
user_id=raw_input.user_id,
extracted_data=extracted_data,
confidence_score=0.0 # Will be set by classifier
)
audited_expense = classifier.classify_and_audit(provisional_expense)
# For now, we auto-confirm if confidence is high.
if audited_expense.confidence_score > 0.7:
final_expense = FinalExpense(
user_id=audited_expense.user_id,
provider_name=audited_expense.extracted_data.description, # Simplified mapping
amount=audited_expense.extracted_data.amount,
currency=audited_expense.extracted_data.currency,
expense_date=audited_expense.extracted_data.expense_date,
description=audited_expense.extracted_data.description,
category=audited_expense.category,
expense_type="personal", # Default
initial_processing_method=audited_expense.processing_method,
confirmed_by="auto-confirm"
)
# 4. Persistence
db_record = repositories.save_final_expense(db, final_expense)
logger.info(f"Successfully processed and saved expense ID {db_record.id}")
return db_record
else:
logger.warning(f"Expense for user {raw_input.user_id} has low confidence. Awaiting manual confirmation.")
# Here you would store the provisional expense and notify the user
return None