feat: Implement core application structure, AI extraction, persistence, and Telegram bot modules with updated configuration and dependencies.

2026-03-15 13:26:04 +00:00 · 2025-12-18 12:15:04 -06:00
parent 7276e480b0
commit 899482580e
45 changed files with 1157 additions and 225 deletions
--- a/app/ai/init.py
+++ b/app/ai/init.py
--- a/app/ai/classifier.py
+++ b/app/ai/classifier.py
@@ -0,0 +1,42 @@
+"""
+AI-powered classification and confidence scoring.
+"""
+import openai
+import json
+import logging
+from typing import Dict, Any
+
+from app.config import config
+from app.ai.prompts import AUDITOR_PROMPT
+from app.schema.base import ProvisionalExpense
+
+# Configure the OpenAI client
+openai.api_key = config.OPENAI_API_KEY
+
+logger = logging.getLogger(__name__)
+
+def classify_and_audit(expense: ProvisionalExpense) -> ProvisionalExpense:
+    """
+    Uses an AI model to audit an extracted expense, providing a confidence
+    score and notes. This is a placeholder for a more complex classification
+    and validation logic.
+
+    Args:
+        expense: A ProvisionalExpense object with extracted data.
+
+    Returns:
+        The same ProvisionalExpense object, updated with the audit findings.
+    """
+    logger.info(f"Starting AI audit for expense: {expense.extracted_data.description}")
+
+    # For now, this is a placeholder. A real implementation would
+    # call an AI model like in the extractor.
+    # For demonstration, we'll just assign a high confidence score.
+    
+    expense.confidence_score = 0.95
+    expense.validation_notes.append("AI audit placeholder: auto-approved.")
+    expense.processing_method = "ai_inference" # Assume AI was used
+
+    logger.info("AI audit placeholder complete.")
+    
+    return expense
--- a/app/ai/confidence.py
+++ b/app/ai/confidence.py
@@ -0,0 +1,16 @@
+"""
+Functions for calculating confidence scores.
+"""
+
+def calculate_confidence(extracted_data: dict) -> float:
+    """
+    Calculates a confidence score based on the quality of the extracted data.
+    Stub function.
+    """
+    score = 1.0
+    # Lower score if key fields are missing
+    if not extracted_data.get("amount"):
+        score -= 0.5
+    if not extracted_data.get("description"):
+        score -= 0.3
+    return max(0.0, score)
--- a/app/ai/extractor.py
+++ b/app/ai/extractor.py
@@ -0,0 +1,60 @@
+"""
+AI-powered data extraction from raw text.
+"""
+import openai
+import json
+import logging
+from typing import Dict, Any
+
+from app.config import config
+from app.ai.prompts import EXTRACTOR_PROMPT
+from app.schema.base import ExtractedExpense
+
+# Configure the OpenAI client
+openai.api_key = config.OPENAI_API_KEY
+
+logger = logging.getLogger(__name__)
+
+def extract_expense_data(text: str) -> ExtractedExpense:
+    """
+    Uses an AI model to extract structured expense data from a raw text string.
+
+    Args:
+        text: The raw text from user input, OCR, or transcription.
+
+    Returns:
+        An ExtractedExpense object with the data found by the AI.
+    """
+    logger.info(f"Starting AI extraction for text: '{text[:100]}...'")
+    
+    try:
+        response = openai.ChatCompletion.create(
+            model="gpt-3.5-turbo",  # Or another suitable model
+            messages=[
+                {"role": "system", "content": EXTRACTOR_PROMPT},
+                {"role": "user", "content": text}
+            ],
+            temperature=0.0,
+            response_format={"type": "json_object"}
+        )
+
+        # The response from OpenAI should be a JSON string in the message content
+        json_response = response.choices[0].message['content']
+        extracted_data = json.loads(json_response)
+        
+        logger.info(f"AI extraction successful. Raw JSON: {extracted_data}")
+
+        # Add the original text to the model for audit purposes
+        extracted_data['raw_text'] = text
+
+        return ExtractedExpense(**extracted_data)
+
+    except json.JSONDecodeError as e:
+        logger.error(f"Failed to decode JSON from AI response: {e}")
+        # Return a model with only the raw text for manual review
+        return ExtractedExpense(raw_text=text)
+    except Exception as e:
+        logger.error(f"An unexpected error occurred during AI extraction: {e}")
+        # Return a model with only the raw text
+        return ExtractedExpense(raw_text=text)
+
--- a/app/ai/prompts.py
+++ b/app/ai/prompts.py
@@ -0,0 +1,56 @@
+"""
+Version-controlled prompts for AI agents.
+"""
+
+# Prompt for the "Extractor" AI agent, which pulls structured data from raw text.
+EXTRACTOR_PROMPT = """
+You are a highly specialized AI assistant for expense tracking. Your task is to extract structured information from a given text. The text is a user's expense entry.
+
+From the text, extract the following fields:
+- "amount": The numeric value of the expense.
+- "currency": The currency code (e.g., USD, EUR, CLP). If not specified, assume 'EUR'.
+- "description": A brief description of what the expense was for.
+- "date": The date of the expense in YYYY-MM-DD format. If not specified, use today's date.
+- "category": The category of the expense (e.g., Food, Transport, Shopping, Rent, Utilities). If you cannot determine it, use 'Other'.
+
+Respond ONLY with a valid JSON object containing these fields. Do not add any explanation or conversational text.
+
+Example Text: "lunch with colleagues today, 25.50 eur"
+Example JSON:
+{
+  "amount": 25.50,
+  "currency": "EUR",
+  "description": "Lunch with colleagues",
+  "date": "2025-12-18",
+  "category": "Food"
+}
+"""
+
+# Prompt for a "Classifier" or "Auditor" agent, which could validate the extraction.
+# This is a placeholder for a potential future agent.
+AUDITOR_PROMPT = """
+You are an auditing AI. Your task is to review an expense record and determine its validity and compliance.
+For the given JSON of an expense, check the following:
+- Is the amount reasonable?
+- Is the description clear?
+- Is the category appropriate?
+
+Based on your analysis, provide a "confidence_score" between 0.0 and 1.0 and a brief "audit_notes" string.
+
+Respond ONLY with a valid JSON object.
+
+Example Input JSON:
+{
+  "amount": 25.50,
+  "currency": "EUR",
+  "description": "Lunch with colleagues",
+  "date": "2025-12-18",
+  "category": "Food"
+}
+
+Example Output JSON:
+{
+  "confidence_score": 0.95,
+  "audit_notes": "The expense seems valid and well-categorized."
+}
+"""