feat: Implement core application structure, AI extraction, persistence, and Telegram bot modules with updated configuration and dependencies.

This commit is contained in:
Marco Gallegos
2025-12-18 12:15:04 -06:00
parent 7276e480b0
commit 899482580e
45 changed files with 1157 additions and 225 deletions

0
app/ai/__init__.py Normal file
View File

42
app/ai/classifier.py Normal file
View File

@@ -0,0 +1,42 @@
"""
AI-powered classification and confidence scoring.
"""
import openai
import json
import logging
from typing import Dict, Any
from app.config import config
from app.ai.prompts import AUDITOR_PROMPT
from app.schema.base import ProvisionalExpense
# Configure the OpenAI client
openai.api_key = config.OPENAI_API_KEY
logger = logging.getLogger(__name__)
def classify_and_audit(expense: ProvisionalExpense) -> ProvisionalExpense:
"""
Uses an AI model to audit an extracted expense, providing a confidence
score and notes. This is a placeholder for a more complex classification
and validation logic.
Args:
expense: A ProvisionalExpense object with extracted data.
Returns:
The same ProvisionalExpense object, updated with the audit findings.
"""
logger.info(f"Starting AI audit for expense: {expense.extracted_data.description}")
# For now, this is a placeholder. A real implementation would
# call an AI model like in the extractor.
# For demonstration, we'll just assign a high confidence score.
expense.confidence_score = 0.95
expense.validation_notes.append("AI audit placeholder: auto-approved.")
expense.processing_method = "ai_inference" # Assume AI was used
logger.info("AI audit placeholder complete.")
return expense

16
app/ai/confidence.py Normal file
View File

@@ -0,0 +1,16 @@
"""
Functions for calculating confidence scores.
"""
def calculate_confidence(extracted_data: dict) -> float:
"""
Calculates a confidence score based on the quality of the extracted data.
Stub function.
"""
score = 1.0
# Lower score if key fields are missing
if not extracted_data.get("amount"):
score -= 0.5
if not extracted_data.get("description"):
score -= 0.3
return max(0.0, score)

60
app/ai/extractor.py Normal file
View File

@@ -0,0 +1,60 @@
"""
AI-powered data extraction from raw text.
"""
import openai
import json
import logging
from typing import Dict, Any
from app.config import config
from app.ai.prompts import EXTRACTOR_PROMPT
from app.schema.base import ExtractedExpense
# Configure the OpenAI client
openai.api_key = config.OPENAI_API_KEY
logger = logging.getLogger(__name__)
def extract_expense_data(text: str) -> ExtractedExpense:
"""
Uses an AI model to extract structured expense data from a raw text string.
Args:
text: The raw text from user input, OCR, or transcription.
Returns:
An ExtractedExpense object with the data found by the AI.
"""
logger.info(f"Starting AI extraction for text: '{text[:100]}...'")
try:
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo", # Or another suitable model
messages=[
{"role": "system", "content": EXTRACTOR_PROMPT},
{"role": "user", "content": text}
],
temperature=0.0,
response_format={"type": "json_object"}
)
# The response from OpenAI should be a JSON string in the message content
json_response = response.choices[0].message['content']
extracted_data = json.loads(json_response)
logger.info(f"AI extraction successful. Raw JSON: {extracted_data}")
# Add the original text to the model for audit purposes
extracted_data['raw_text'] = text
return ExtractedExpense(**extracted_data)
except json.JSONDecodeError as e:
logger.error(f"Failed to decode JSON from AI response: {e}")
# Return a model with only the raw text for manual review
return ExtractedExpense(raw_text=text)
except Exception as e:
logger.error(f"An unexpected error occurred during AI extraction: {e}")
# Return a model with only the raw text
return ExtractedExpense(raw_text=text)

56
app/ai/prompts.py Normal file
View File

@@ -0,0 +1,56 @@
"""
Version-controlled prompts for AI agents.
"""
# Prompt for the "Extractor" AI agent, which pulls structured data from raw text.
EXTRACTOR_PROMPT = """
You are a highly specialized AI assistant for expense tracking. Your task is to extract structured information from a given text. The text is a user's expense entry.
From the text, extract the following fields:
- "amount": The numeric value of the expense.
- "currency": The currency code (e.g., USD, EUR, CLP). If not specified, assume 'EUR'.
- "description": A brief description of what the expense was for.
- "date": The date of the expense in YYYY-MM-DD format. If not specified, use today's date.
- "category": The category of the expense (e.g., Food, Transport, Shopping, Rent, Utilities). If you cannot determine it, use 'Other'.
Respond ONLY with a valid JSON object containing these fields. Do not add any explanation or conversational text.
Example Text: "lunch with colleagues today, 25.50 eur"
Example JSON:
{
"amount": 25.50,
"currency": "EUR",
"description": "Lunch with colleagues",
"date": "2025-12-18",
"category": "Food"
}
"""
# Prompt for a "Classifier" or "Auditor" agent, which could validate the extraction.
# This is a placeholder for a potential future agent.
AUDITOR_PROMPT = """
You are an auditing AI. Your task is to review an expense record and determine its validity and compliance.
For the given JSON of an expense, check the following:
- Is the amount reasonable?
- Is the description clear?
- Is the category appropriate?
Based on your analysis, provide a "confidence_score" between 0.0 and 1.0 and a brief "audit_notes" string.
Respond ONLY with a valid JSON object.
Example Input JSON:
{
"amount": 25.50,
"currency": "EUR",
"description": "Lunch with colleagues",
"date": "2025-12-18",
"category": "Food"
}
Example Output JSON:
{
"confidence_score": 0.95,
"audit_notes": "The expense seems valid and well-categorized."
}
"""