feat: Implement core application structure, AI extraction, persistence, and Telegram bot modules with updated configuration and dependencies.

2026-01-13 13:25:15 +00:00 · 2025-12-18 12:15:04 -06:00
parent 7276e480b0
commit 899482580e
45 changed files with 1157 additions and 225 deletions
--- a/app/router.py
+++ b/app/router.py
@@ -0,0 +1,85 @@
+"""
+Main application router.
+
+Orchestrates the entire expense processing workflow, from input to persistence.
+"""
+import logging
+
+from app.schema.base import RawInput, ProvisionalExpense, FinalExpense, ExpenseStatus
+from app.ingestion import text, image, audio, document
+from app.ai import extractor, classifier
+from app.persistence import repositories
+from sqlalchemy.orm import Session
+
+logger = logging.getLogger(__name__)
+
+def process_expense_input(db: Session, raw_input: RawInput) -> FinalExpense:
+    """
+    Full pipeline for processing a raw input.
+
+    1. Ingestion: Convert input (text, image, etc.) to raw text.
+    2. AI Extraction: Parse the raw text into structured data.
+    3. AI Classification/Audit: Validate and categorize the expense.
+    4. Persistence: Save the final, confirmed expense to the database.
+    """
+    logger.info(f"Router processing input for user {raw_input.user_id} of type {raw_input.input_type}")
+
+    # 1. Ingestion
+    raw_text = ""
+    if raw_input.input_type == "text":
+        raw_text = text.process_text_input(raw_input.data)
+    elif raw_input.input_type == "image":
+        # In a real app, data would be bytes, not a string path
+        raw_text = image.process_image_input(raw_input.data.encode()) 
+    elif raw_input.input_type == "audio":
+        raw_text = audio.process_audio_input(raw_input.data.encode())
+    elif raw_input.input_type == "document":
+        raw_text = document.process_document_input(raw_input.data.encode())
+    else:
+        raise ValueError(f"Unsupported input type: {raw_input.input_type}")
+
+    if not raw_text:
+        logger.error("Ingestion phase resulted in empty text. Aborting.")
+        # We might want to return a specific status here
+        return None
+
+    # 2. AI Extraction
+    extracted_data = extractor.extract_expense_data(raw_text)
+    if not extracted_data.amount or not extracted_data.description:
+        logger.error("AI extraction failed to find key details. Aborting.")
+        return None
+
+    # 3. AI Classification & Confirmation (simplified)
+    # In a real bot, you would present this to the user for confirmation.
+    provisional_expense = ProvisionalExpense(
+        user_id=raw_input.user_id,
+        extracted_data=extracted_data,
+        confidence_score=0.0 # Will be set by classifier
+    )
+    
+    audited_expense = classifier.classify_and_audit(provisional_expense)
+    
+    # For now, we auto-confirm if confidence is high.
+    if audited_expense.confidence_score > 0.7:
+        final_expense = FinalExpense(
+            user_id=audited_expense.user_id,
+            provider_name=audited_expense.extracted_data.description, # Simplified mapping
+            amount=audited_expense.extracted_data.amount,
+            currency=audited_expense.extracted_data.currency,
+            expense_date=audited_expense.extracted_data.expense_date,
+            description=audited_expense.extracted_data.description,
+            category=audited_expense.category,
+            expense_type="personal", # Default
+            initial_processing_method=audited_expense.processing_method,
+            confirmed_by="auto-confirm"
+        )
+        
+        # 4. Persistence
+        db_record = repositories.save_final_expense(db, final_expense)
+        logger.info(f"Successfully processed and saved expense ID {db_record.id}")
+        return db_record
+    
+    else:
+        logger.warning(f"Expense for user {raw_input.user_id} has low confidence. Awaiting manual confirmation.")
+        # Here you would store the provisional expense and notify the user
+        return None