mirror of
https://github.com/marcogll/telegram_expenses_controller.git
synced 2026-01-13 13:25:15 +00:00
feat: Implement core application structure, AI extraction, persistence, and Telegram bot modules with updated configuration and dependencies.
This commit is contained in:
0
app/ai/__init__.py
Normal file
0
app/ai/__init__.py
Normal file
42
app/ai/classifier.py
Normal file
42
app/ai/classifier.py
Normal file
@@ -0,0 +1,42 @@
|
||||
"""
|
||||
AI-powered classification and confidence scoring.
|
||||
"""
|
||||
import openai
|
||||
import json
|
||||
import logging
|
||||
from typing import Dict, Any
|
||||
|
||||
from app.config import config
|
||||
from app.ai.prompts import AUDITOR_PROMPT
|
||||
from app.schema.base import ProvisionalExpense
|
||||
|
||||
# Configure the OpenAI client
|
||||
openai.api_key = config.OPENAI_API_KEY
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
def classify_and_audit(expense: ProvisionalExpense) -> ProvisionalExpense:
|
||||
"""
|
||||
Uses an AI model to audit an extracted expense, providing a confidence
|
||||
score and notes. This is a placeholder for a more complex classification
|
||||
and validation logic.
|
||||
|
||||
Args:
|
||||
expense: A ProvisionalExpense object with extracted data.
|
||||
|
||||
Returns:
|
||||
The same ProvisionalExpense object, updated with the audit findings.
|
||||
"""
|
||||
logger.info(f"Starting AI audit for expense: {expense.extracted_data.description}")
|
||||
|
||||
# For now, this is a placeholder. A real implementation would
|
||||
# call an AI model like in the extractor.
|
||||
# For demonstration, we'll just assign a high confidence score.
|
||||
|
||||
expense.confidence_score = 0.95
|
||||
expense.validation_notes.append("AI audit placeholder: auto-approved.")
|
||||
expense.processing_method = "ai_inference" # Assume AI was used
|
||||
|
||||
logger.info("AI audit placeholder complete.")
|
||||
|
||||
return expense
|
||||
16
app/ai/confidence.py
Normal file
16
app/ai/confidence.py
Normal file
@@ -0,0 +1,16 @@
|
||||
"""
|
||||
Functions for calculating confidence scores.
|
||||
"""
|
||||
|
||||
def calculate_confidence(extracted_data: dict) -> float:
|
||||
"""
|
||||
Calculates a confidence score based on the quality of the extracted data.
|
||||
Stub function.
|
||||
"""
|
||||
score = 1.0
|
||||
# Lower score if key fields are missing
|
||||
if not extracted_data.get("amount"):
|
||||
score -= 0.5
|
||||
if not extracted_data.get("description"):
|
||||
score -= 0.3
|
||||
return max(0.0, score)
|
||||
60
app/ai/extractor.py
Normal file
60
app/ai/extractor.py
Normal file
@@ -0,0 +1,60 @@
|
||||
"""
|
||||
AI-powered data extraction from raw text.
|
||||
"""
|
||||
import openai
|
||||
import json
|
||||
import logging
|
||||
from typing import Dict, Any
|
||||
|
||||
from app.config import config
|
||||
from app.ai.prompts import EXTRACTOR_PROMPT
|
||||
from app.schema.base import ExtractedExpense
|
||||
|
||||
# Configure the OpenAI client
|
||||
openai.api_key = config.OPENAI_API_KEY
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
def extract_expense_data(text: str) -> ExtractedExpense:
|
||||
"""
|
||||
Uses an AI model to extract structured expense data from a raw text string.
|
||||
|
||||
Args:
|
||||
text: The raw text from user input, OCR, or transcription.
|
||||
|
||||
Returns:
|
||||
An ExtractedExpense object with the data found by the AI.
|
||||
"""
|
||||
logger.info(f"Starting AI extraction for text: '{text[:100]}...'")
|
||||
|
||||
try:
|
||||
response = openai.ChatCompletion.create(
|
||||
model="gpt-3.5-turbo", # Or another suitable model
|
||||
messages=[
|
||||
{"role": "system", "content": EXTRACTOR_PROMPT},
|
||||
{"role": "user", "content": text}
|
||||
],
|
||||
temperature=0.0,
|
||||
response_format={"type": "json_object"}
|
||||
)
|
||||
|
||||
# The response from OpenAI should be a JSON string in the message content
|
||||
json_response = response.choices[0].message['content']
|
||||
extracted_data = json.loads(json_response)
|
||||
|
||||
logger.info(f"AI extraction successful. Raw JSON: {extracted_data}")
|
||||
|
||||
# Add the original text to the model for audit purposes
|
||||
extracted_data['raw_text'] = text
|
||||
|
||||
return ExtractedExpense(**extracted_data)
|
||||
|
||||
except json.JSONDecodeError as e:
|
||||
logger.error(f"Failed to decode JSON from AI response: {e}")
|
||||
# Return a model with only the raw text for manual review
|
||||
return ExtractedExpense(raw_text=text)
|
||||
except Exception as e:
|
||||
logger.error(f"An unexpected error occurred during AI extraction: {e}")
|
||||
# Return a model with only the raw text
|
||||
return ExtractedExpense(raw_text=text)
|
||||
|
||||
56
app/ai/prompts.py
Normal file
56
app/ai/prompts.py
Normal file
@@ -0,0 +1,56 @@
|
||||
"""
|
||||
Version-controlled prompts for AI agents.
|
||||
"""
|
||||
|
||||
# Prompt for the "Extractor" AI agent, which pulls structured data from raw text.
|
||||
EXTRACTOR_PROMPT = """
|
||||
You are a highly specialized AI assistant for expense tracking. Your task is to extract structured information from a given text. The text is a user's expense entry.
|
||||
|
||||
From the text, extract the following fields:
|
||||
- "amount": The numeric value of the expense.
|
||||
- "currency": The currency code (e.g., USD, EUR, CLP). If not specified, assume 'EUR'.
|
||||
- "description": A brief description of what the expense was for.
|
||||
- "date": The date of the expense in YYYY-MM-DD format. If not specified, use today's date.
|
||||
- "category": The category of the expense (e.g., Food, Transport, Shopping, Rent, Utilities). If you cannot determine it, use 'Other'.
|
||||
|
||||
Respond ONLY with a valid JSON object containing these fields. Do not add any explanation or conversational text.
|
||||
|
||||
Example Text: "lunch with colleagues today, 25.50 eur"
|
||||
Example JSON:
|
||||
{
|
||||
"amount": 25.50,
|
||||
"currency": "EUR",
|
||||
"description": "Lunch with colleagues",
|
||||
"date": "2025-12-18",
|
||||
"category": "Food"
|
||||
}
|
||||
"""
|
||||
|
||||
# Prompt for a "Classifier" or "Auditor" agent, which could validate the extraction.
|
||||
# This is a placeholder for a potential future agent.
|
||||
AUDITOR_PROMPT = """
|
||||
You are an auditing AI. Your task is to review an expense record and determine its validity and compliance.
|
||||
For the given JSON of an expense, check the following:
|
||||
- Is the amount reasonable?
|
||||
- Is the description clear?
|
||||
- Is the category appropriate?
|
||||
|
||||
Based on your analysis, provide a "confidence_score" between 0.0 and 1.0 and a brief "audit_notes" string.
|
||||
|
||||
Respond ONLY with a valid JSON object.
|
||||
|
||||
Example Input JSON:
|
||||
{
|
||||
"amount": 25.50,
|
||||
"currency": "EUR",
|
||||
"description": "Lunch with colleagues",
|
||||
"date": "2025-12-18",
|
||||
"category": "Food"
|
||||
}
|
||||
|
||||
Example Output JSON:
|
||||
{
|
||||
"confidence_score": 0.95,
|
||||
"audit_notes": "The expense seems valid and well-categorized."
|
||||
}
|
||||
"""
|
||||
Reference in New Issue
Block a user