mirror of
https://github.com/marcogll/telegram_expenses_controller.git
synced 2026-01-13 13:25:15 +00:00
Merge pull request #3 from marcogll/feat-initial-project-structure-14463650289909895681
Implement Input Handler and Data Models
This commit is contained in:
@@ -0,0 +1,67 @@
|
||||
from pydantic import BaseModel, Field
|
||||
from typing import Optional, List
|
||||
from datetime import datetime, date
|
||||
|
||||
class RawInput(BaseModel):
|
||||
"""
|
||||
Represents the raw data received from the input source (e.g., n8n).
|
||||
"""
|
||||
user_id: str
|
||||
input_type: str = Field(..., alias="type", description="The type of input, e.g., 'text', 'voice', 'image', 'pdf'")
|
||||
data: str
|
||||
|
||||
class ExtractedExpense(BaseModel):
|
||||
"""
|
||||
Represents an expense after initial data extraction (e.g., from OCR or transcription).
|
||||
Fields are mostly optional as extraction may not be perfect.
|
||||
"""
|
||||
provider_name: Optional[str] = None
|
||||
amount: Optional[float] = None
|
||||
currency: Optional[str] = "MXN"
|
||||
expense_date: Optional[date] = None
|
||||
description: Optional[str] = None
|
||||
raw_text: str
|
||||
|
||||
class ProvisionalExpense(BaseModel):
|
||||
"""
|
||||
Represents a fully processed but unconfirmed expense.
|
||||
This is the state before the user validates the data.
|
||||
"""
|
||||
user_id: str
|
||||
extracted_data: ExtractedExpense
|
||||
|
||||
# Classified fields
|
||||
category: Optional[str] = "Por Determinar"
|
||||
subcategory: Optional[str] = None
|
||||
expense_type: Optional[str] = Field(None, alias="tipo_gasto_default", description="e.g., 'personal' or 'negocio'")
|
||||
|
||||
# Metadata
|
||||
confidence_score: float
|
||||
processing_method: str = Field(..., description="How the expense was classified, e.g., 'provider_match', 'keyword_match', 'ai_inference'")
|
||||
validation_notes: List[str] = []
|
||||
status: str = "AWAITING_CONFIRMATION"
|
||||
timestamp: datetime = Field(default_factory=datetime.now)
|
||||
|
||||
class FinalExpense(BaseModel):
|
||||
"""
|
||||
Represents a final, user-confirmed expense record.
|
||||
This is the data that will be stored permanently.
|
||||
"""
|
||||
user_id: str
|
||||
provider_name: str
|
||||
amount: float
|
||||
currency: str
|
||||
expense_date: date
|
||||
description: Optional[str] = None
|
||||
|
||||
category: str
|
||||
subcategory: Optional[str] = None
|
||||
expense_type: str
|
||||
|
||||
# Audit trail
|
||||
initial_processing_method: str
|
||||
confirmed_by: str
|
||||
confirmed_at: datetime = Field(default_factory=datetime.now)
|
||||
audit_log: List[str] = []
|
||||
|
||||
status: str = "CONFIRMED"
|
||||
|
||||
23
src/main.py
23
src/main.py
@@ -1,4 +1,6 @@
|
||||
from fastapi import FastAPI, Request
|
||||
from src.data_models import RawInput
|
||||
from src.modules.input_handler import handle_input
|
||||
import logging
|
||||
|
||||
# Configure logging
|
||||
@@ -12,10 +14,21 @@ async def root():
|
||||
return {"message": "Expense Tracker API is running."}
|
||||
|
||||
@app.post("/process-expense")
|
||||
async def process_expense(request: Request):
|
||||
async def process_expense(raw_input: RawInput):
|
||||
"""
|
||||
Receives expense data from n8n, logs it, and returns a confirmation.
|
||||
Receives raw expense data, processes it using the input handler,
|
||||
and returns the normalized text.
|
||||
"""
|
||||
payload = await request.json()
|
||||
logger.info(f"Received expense data: {payload}")
|
||||
return {"status": "received", "data": payload}
|
||||
logger.info(f"Received raw input: {raw_input.dict()}")
|
||||
|
||||
# Convert RawInput to a dictionary suitable for handle_input
|
||||
input_data = raw_input.dict(by_alias=True)
|
||||
|
||||
# Process the input to get normalized text
|
||||
normalized_text = handle_input(input_data)
|
||||
|
||||
logger.info(f"Normalized text: '{normalized_text}'")
|
||||
|
||||
# For now, just return the processed text.
|
||||
# In the future, this will trigger the analysis phase.
|
||||
return {"status": "processed", "normalized_text": normalized_text}
|
||||
|
||||
@@ -0,0 +1,58 @@
|
||||
import logging
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
def normalize_text(text: str) -> str:
|
||||
"""
|
||||
Normalizes a string by converting it to lowercase and stripping whitespace.
|
||||
"""
|
||||
return text.lower().strip()
|
||||
|
||||
def process_voice_input(voice_data: bytes) -> str:
|
||||
"""
|
||||
Placeholder for voice input processing.
|
||||
This will eventually involve transcription.
|
||||
"""
|
||||
logger.info("Processing voice input (stub).")
|
||||
# In the future, this will call a transcription service.
|
||||
return ""
|
||||
|
||||
def process_image_input(image_data: bytes) -> str:
|
||||
"""
|
||||
Placeholder for image input processing.
|
||||
This will eventually involve OCR.
|
||||
"""
|
||||
logger.info("Processing image input (stub).")
|
||||
# In the future, this will call an OCR service.
|
||||
return ""
|
||||
|
||||
def process_pdf_input(pdf_data: bytes) -> str:
|
||||
"""
|
||||
Placeholder for PDF input processing.
|
||||
This will eventually involve PDF text extraction.
|
||||
"""
|
||||
logger.info("Processing PDF input (stub).")
|
||||
# In the future, this will call a PDF extraction library.
|
||||
return ""
|
||||
|
||||
def handle_input(input_data: dict) -> str:
|
||||
"""
|
||||
Handles different input types and returns normalized text.
|
||||
"""
|
||||
input_type = input_data.get("type", "text")
|
||||
data = input_data.get("data", "")
|
||||
|
||||
if input_type == "text":
|
||||
return normalize_text(data)
|
||||
elif input_type == "voice":
|
||||
# Assuming data is base64 encoded or a direct byte stream in a real scenario
|
||||
return process_voice_input(data)
|
||||
elif input_type == "image":
|
||||
return process_image_input(data)
|
||||
elif input_type == "pdf":
|
||||
return process_pdf_input(data)
|
||||
else:
|
||||
logger.warning(f"Unsupported input type: {input_type}")
|
||||
return ""
|
||||
|
||||
12
tasks.md
12
tasks.md
@@ -23,10 +23,10 @@ Principio rector:
|
||||
- **NOTA:** Se ha modificado el enfoque. En lugar de un webhook directo de Telegram, se utiliza **n8n** para manejar la recepción de datos. La aplicación expone un endpoint genérico `/process-expense` para este propósito.
|
||||
- [x] Endpoint `/process-expense` implementado en FastAPI.
|
||||
- [x] El endpoint recibe y loguea el payload.
|
||||
- [ ] **1.4 Input Handler**
|
||||
- [ ] Implementar `input_handler.py`.
|
||||
- [ ] Normalizar texto.
|
||||
- [ ] Implementar stubs para voz, imagen y PDF.
|
||||
- [x] **1.4 Input Handler**
|
||||
- [x] Implementar `input_handler.py`.
|
||||
- [x] Normalizar texto.
|
||||
- [x] Implementar stubs para voz, imagen y PDF.
|
||||
|
||||
---
|
||||
|
||||
@@ -34,8 +34,8 @@ Principio rector:
|
||||
|
||||
**Objetivo:** Tener claridad absoluta sobre qué es un gasto y en qué estado vive.
|
||||
|
||||
- [ ] **2.1 Modelos Pydantic**
|
||||
- [ ] Crear modelos: `RawInput`, `ExtractedExpense`, `ProvisionalExpense`, `FinalExpense`.
|
||||
- [x] **2.1 Modelos Pydantic**
|
||||
- [x] Crear modelos: `RawInput`, `ExtractedExpense`, `ProvisionalExpense`, `FinalExpense`.
|
||||
- [ ] **2.2 Estados del Gasto**
|
||||
- [ ] Definir estados explícitos: `RECEIVED`, `ANALYZED`, `AWAITING_CONFIRMATION`, `CONFIRMED`, `CORRECTED`, `STORED`.
|
||||
|
||||
|
||||
Reference in New Issue
Block a user