removed bot logging messages
This commit is contained in:
Binary file not shown.
Binary file not shown.
30
bot.py
30
bot.py
@@ -1,5 +1,8 @@
|
||||
import os
|
||||
import asyncio
|
||||
import sys
|
||||
import concurrent.futures
|
||||
|
||||
import logging
|
||||
from contextlib import suppress
|
||||
from dotenv import load_dotenv
|
||||
@@ -10,7 +13,28 @@ from telegram.ext import ApplicationBuilder, CommandHandler, MessageHandler, fil
|
||||
# Import your existing logic
|
||||
from agent import parse_page
|
||||
from database import upload_entry
|
||||
from scraper import get_clean_content
|
||||
from scraper import get_clean_content as _get_clean_content
|
||||
|
||||
|
||||
# Run the scraper in a separate thread with its own event loop to avoid
|
||||
# Windows Selector vs Proactor event loop conflicts between PTB and Playwright.
|
||||
def _run_scraper_in_thread(url: str) -> str:
|
||||
# Proactor is required for subprocesses (Playwright) on Windows
|
||||
if sys.platform == 'win32':
|
||||
asyncio.set_event_loop_policy(asyncio.WindowsProactorEventLoopPolicy())
|
||||
loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(loop)
|
||||
try:
|
||||
return loop.run_until_complete(_get_clean_content(url))
|
||||
finally:
|
||||
loop.close()
|
||||
|
||||
|
||||
async def get_clean_content(url: str) -> str:
|
||||
loop = asyncio.get_event_loop()
|
||||
with concurrent.futures.ThreadPoolExecutor() as pool:
|
||||
result = await loop.run_in_executor(pool, _run_scraper_in_thread, url)
|
||||
return result
|
||||
|
||||
load_dotenv()
|
||||
logging.getLogger("httpx").setLevel(logging.WARNING)
|
||||
@@ -295,6 +319,6 @@ async def _main():
|
||||
|
||||
if __name__ == '__main__':
|
||||
import sys
|
||||
if sys.platform == 'win32':
|
||||
asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
|
||||
# if sys.platform == 'win32':
|
||||
asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
|
||||
asyncio.run(_main())
|
||||
15
database.py
15
database.py
@@ -8,6 +8,7 @@ load_dotenv()
|
||||
|
||||
pb = PocketBase(os.getenv('POCKETBASE_URL'))
|
||||
admin_data = pb.admins.auth_with_password(os.getenv('POCKETBASE_ADMIN_EMAIL'), os.getenv('POCKETBASE_ADMIN_PASSWORD'))
|
||||
show_debug_msg = False
|
||||
|
||||
def convert_datetime_to_pocketbase(date_time_str):
|
||||
"""
|
||||
@@ -18,7 +19,8 @@ def convert_datetime_to_pocketbase(date_time_str):
|
||||
return None
|
||||
|
||||
try:
|
||||
print(f"[DEBUG] Converting datetime: '{date_time_str}' (type: {type(date_time_str)})")
|
||||
if show_debug_msg:
|
||||
print(f"[DEBUG] Converting datetime: '{date_time_str}' (type: {type(date_time_str)})")
|
||||
|
||||
# Parse the input format: "DD-MM-YYYY HH:MM" or "DD-MM-YYYY (HH:MM)"
|
||||
date_time_str = date_time_str.replace("(", "").replace(")", "").strip()
|
||||
@@ -32,7 +34,8 @@ def convert_datetime_to_pocketbase(date_time_str):
|
||||
|
||||
# Convert to PocketBase local datetime format: YYYY-MM-DD HH:MM:SS
|
||||
pb_format = dt.strftime("%Y-%m-%d %H:%M:%S")
|
||||
print(f"[DEBUG] Converted to PocketBase format: '{pb_format}'")
|
||||
if show_debug_msg:
|
||||
print(f"[DEBUG] Converted to PocketBase format: '{pb_format}'")
|
||||
return pb_format
|
||||
except Exception as e:
|
||||
print(f"[ERROR] Error converting datetime '{date_time_str}': {e}")
|
||||
@@ -49,7 +52,7 @@ def upload_entry(data, entry_type='opportunity', url=None):
|
||||
entry_type: 'opportunity' or 'event'
|
||||
url: The source URL of the entry
|
||||
"""
|
||||
print(f"[DEBUG] Uploading {entry_type} entry. Data: {data}")
|
||||
print(f"[DEBUG] Uploading {entry_type} entry. Data: {data["title"]}")
|
||||
data = dict(data)
|
||||
|
||||
# Add URL to data if provided
|
||||
@@ -66,7 +69,8 @@ def upload_entry(data, entry_type='opportunity', url=None):
|
||||
data['datetime'] = convert_datetime_to_pocketbase(data['date_time'])
|
||||
# Remove the original field since PocketBase expects 'datetime'
|
||||
del data['date_time']
|
||||
print(f"[DEBUG] Event datetime: '{original_dt}' -> '{data['datetime']}'")
|
||||
if show_debug_msg:
|
||||
print(f"[DEBUG] Event datetime: '{original_dt}' -> '{data['datetime']}'")
|
||||
else:
|
||||
print(f"[WARNING] No 'date_time' field found in event data")
|
||||
|
||||
@@ -81,7 +85,8 @@ def upload_entry(data, entry_type='opportunity', url=None):
|
||||
original_deadline = data['deadline']
|
||||
# Convert deadline to PocketBase datetime format
|
||||
data['deadline'] = convert_datetime_to_pocketbase(data['deadline'])
|
||||
print(f"[DEBUG] Opportunity deadline: '{original_deadline}' -> '{data['deadline']}'")
|
||||
if show_debug_msg:
|
||||
print(f"[DEBUG] Opportunity deadline: '{original_deadline}' -> '{data['deadline']}'")
|
||||
else:
|
||||
print(f"[WARNING] No 'deadline' field found in opportunity data")
|
||||
|
||||
|
||||
@@ -23,7 +23,7 @@ async def get_clean_content(url: str):
|
||||
js_code="window.scrollTo(0, document.body.scrollHeight);",
|
||||
magic=True
|
||||
)
|
||||
|
||||
|
||||
async with AsyncWebCrawler(config=browser_conf) as crawler:
|
||||
result = await crawler.arun(url=url, config=run_conf)
|
||||
return result.markdown
|
||||
Reference in New Issue
Block a user