68 lines
1.8 KiB
Python
68 lines
1.8 KiB
Python
from pydantic_ai import Agent
|
|
from pydantic_ai.models.ollama import OllamaModel
|
|
from pydantic_ai.providers.ollama import OllamaProvider
|
|
from dotenv import load_dotenv
|
|
import os
|
|
import json
|
|
from prompts import OPPORTUNITY_PROMPT, EVENT_PROMPT
|
|
|
|
load_dotenv()
|
|
|
|
ollama_url = os.getenv("OLLAMA_BASE_URL")
|
|
|
|
prov = OllamaProvider(base_url=ollama_url)
|
|
|
|
# Use qwen2.5:3b or phi4-mini for low-end hardware (RAM < 8GB)
|
|
model = OllamaModel(
|
|
model_name='granite4.1:8b',
|
|
provider=prov
|
|
)
|
|
|
|
# --- OPPORTUNITY AGENT ---
|
|
opportunity_agent = Agent(
|
|
model,
|
|
output_type=str,
|
|
system_prompt=OPPORTUNITY_PROMPT,
|
|
retries=5
|
|
)
|
|
|
|
# --- EVENT AGENT ---
|
|
event_agent = Agent(
|
|
model,
|
|
output_type=str,
|
|
system_prompt=EVENT_PROMPT,
|
|
retries=5
|
|
)
|
|
|
|
async def parse_page(content: str, entry_type: str = "opportunity"):
|
|
"""
|
|
Parse content and extract entry data based on type.
|
|
|
|
Args:
|
|
content: The raw text content to parse
|
|
entry_type: Either 'opportunity' or 'event'
|
|
"""
|
|
# Select the appropriate agent
|
|
agent = opportunity_agent if entry_type == "opportunity" else event_agent
|
|
|
|
# 1. Run the agent (which returns a string)
|
|
print(f"Parsing {entry_type}...")
|
|
# print(content)
|
|
result = await agent.run(content)
|
|
raw_text = result.output
|
|
|
|
# 2. Clean the string
|
|
# We remove the markdown decorators so json.loads doesn't crash
|
|
clean_json = raw_text.replace("```json", "").replace("```", "").strip()
|
|
|
|
try:
|
|
# 3. Convert string to a dictionary
|
|
data_dict = json.loads(clean_json)
|
|
|
|
# 4. Success! return the dictionary to main.py
|
|
return data_dict
|
|
|
|
except json.JSONDecodeError as e:
|
|
print(f"Critical Error: The AI sent invalid JSON. Text was: {raw_text}")
|
|
raise e
|