From 54000adeaadc2d52a5aad6e1f9d0beebec4ebbaf Mon Sep 17 00:00:00 2001 From: Cailean Finn Date: Sun, 10 May 2026 13:39:24 +0100 Subject: [PATCH] removed bot logging messages --- __pycache__/database.cpython-312.pyc | Bin 4174 -> 4289 bytes __pycache__/scraper.cpython-312.pyc | Bin 1484 -> 1484 bytes bot.py | 30 ++++++++++++++++++++++++--- database.py | 15 +++++++++----- scraper.py | 2 +- 5 files changed, 38 insertions(+), 9 deletions(-) diff --git a/__pycache__/database.cpython-312.pyc b/__pycache__/database.cpython-312.pyc index db655c24b54a923747b45721b4789da88ba44fe0..f9c3d3c94d62e17e5229e8e64f8688c3cdf72109 100644 GIT binary patch delta 1393 zcmb7DO>7%Q6n?Wm-u2qM>z{QK7t%BhX|PC>rb?triF-(u(xz%bY9kbv;5r^Bwd=&z zdP6X5Xj)ZBa4U0YP}M_kRJlZ|1m@NsMMUC98=8vBYC#;ha3P{aa6p2YjW;x+7oIdT zZ@>NCoB7_G`F8i6Htj1_Re;()8wxi=SF~m%T`3x14XZb4H%t&8KA=1P(fAB50=LS^974BUBkQ$*8mHaNPa~fNFp(= zm7M0HjOedpP7lFM-)J z&r|djzC-fQ3${>*-Fk!n7=;6Mt#Yn3m&Un?*{Sq&Ws3YFyoZ{}q==8S(r}iw@+X}Z zU_${pwkUolwcHqgs7E)1Cee5th(8L39pk`qa=GVr{OdmF_3``1-*QIMPWr49$*cz_ zNYGb|dAwvTUpupkoXE&}@HqL%H@q(~)CYI_{D-^vyUC8jo&3E{k^0%KLeZ@yI@}U( zkiUF!5%X~XVo2Q=ghBep+Uf=tZSERe=aw1>0zcRY`&d8B53ppz+hF$74oNft(0Yh; zam~ZLNct_a+zd~G|rc+Ra`4< zHP%A_u;y)EP-(Lh7j#?L9&Zb$0Eb)=2k4$5SA@7tgo}B37{@Kp^N%04#m4E~D0cem zq8E#Rd?TwU;wp*$PbG$zWm8G~xk}o>(#W(wej;Z3w(G<@a8ps9mu#sJ{kJ}zx72VD zTmsxQ?|a`jhvwiNehlVNqM6+H|Lqg4yj9HIT2p>TrxY_x1L^|KXDyRmXp=pPI#-^k zE7?-9n9ExE(oEf#HFFs&XErfLGn-g-6EY3VSed$#o3YGl+Nzdw<{l!bU85}?vBJ8# zyJ6-o`cV(gmW!nfPP;*7FNM14sC-VZX8kZ_I delta 1149 zcmZ`&O>7fK6rR~X@24LlvnK8rtMyAujaLD@~Akqi@|TN|5@bnR)Z(dvCt? z?d+4*XD#|~nx+!8my=}tAOB6gk&>I6-vq9c5zX)#>UBDzmuS)lJLuK^40VvJCgaNE zSpF5GKN6c5^n5!ZMb0KgTH+L9y+a$kP3{mwu=rbCf{M__HRC+MHA9LE!bL9VRotrM zD*V93*X`%IIMv`U%%KqC<8``=-9*goW!|E2gim}2rpwd$%J96kaO{u6BW&OlJM|UQzh0D2!#*ijU&HLFBr8pi z`<6LR}QxDb6tspP5duSBKjFe2iH94)O7hBc6g$2!n5@<--EF= zatJnehur06Rt2I&AvQHcM?Uk4A=%^tc7}pVQOv^*MYpyab2D-8acVp9eQR%!Z5{;x;{u;kdtnG&< za&V{LVd<82qVIJZu*Fp_HRL-gb#Bf&D)s+QCHNkdkPs)94}Mmnv5+mTZ@_3U!Z}ab zV|&Ky;EmE;7cNWROH*`;T;zwz6ir4Tp}w8%@cv1UD`o&Ys0zkd#xkAfQsoMxRW*~( z<+2%TEI(0|GG;byWz8@inh|Ge#3?CbPTERW)$D|2md;qELe^}7QLS_E0~c6yev_6} zGh?`^`X&pxeA+luOER}(r~^mw0ltPSWTl>nD)Vi;ed%^?dRrw_k(j!FQ8)mvv?NV{ zrgt~L2sE(9u7@2DW0h|nf4;EgdFmuPG{lC!VBs&90>f}lH{&(TZr1$?+x_W68yh^y zhR!}8Ota5M*=UA^jio>q-s;!7X8VOhyU1L3Uk@(xyOdNLKi?Zqwen9}Mf8(g)tfIA z@}@O8F=myHC;jlq-%B^cYk%h!N2wb9`(zni@?Upy&$oGFGMDW&KgO8z!i(qNXyB~> RZy`oQE4?34-Q0`Oe*to-0|Wp7 diff --git a/__pycache__/scraper.cpython-312.pyc b/__pycache__/scraper.cpython-312.pyc index 0995313c97fd066a5396421ff345a7eca7133e34..f99504b171f73b47048f367c7d7330c6875a48a9 100644 GIT binary patch delta 22 ccmX@ZeTJL&G%qg~0}xnOGh`WT str: + # Proactor is required for subprocesses (Playwright) on Windows + if sys.platform == 'win32': + asyncio.set_event_loop_policy(asyncio.WindowsProactorEventLoopPolicy()) + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + try: + return loop.run_until_complete(_get_clean_content(url)) + finally: + loop.close() + + +async def get_clean_content(url: str) -> str: + loop = asyncio.get_event_loop() + with concurrent.futures.ThreadPoolExecutor() as pool: + result = await loop.run_in_executor(pool, _run_scraper_in_thread, url) + return result load_dotenv() logging.getLogger("httpx").setLevel(logging.WARNING) @@ -295,6 +319,6 @@ async def _main(): if __name__ == '__main__': import sys - if sys.platform == 'win32': - asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy()) + # if sys.platform == 'win32': + asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy()) asyncio.run(_main()) \ No newline at end of file diff --git a/database.py b/database.py index 58576f8..558d56c 100644 --- a/database.py +++ b/database.py @@ -8,6 +8,7 @@ load_dotenv() pb = PocketBase(os.getenv('POCKETBASE_URL')) admin_data = pb.admins.auth_with_password(os.getenv('POCKETBASE_ADMIN_EMAIL'), os.getenv('POCKETBASE_ADMIN_PASSWORD')) +show_debug_msg = False def convert_datetime_to_pocketbase(date_time_str): """ @@ -18,7 +19,8 @@ def convert_datetime_to_pocketbase(date_time_str): return None try: - print(f"[DEBUG] Converting datetime: '{date_time_str}' (type: {type(date_time_str)})") + if show_debug_msg: + print(f"[DEBUG] Converting datetime: '{date_time_str}' (type: {type(date_time_str)})") # Parse the input format: "DD-MM-YYYY HH:MM" or "DD-MM-YYYY (HH:MM)" date_time_str = date_time_str.replace("(", "").replace(")", "").strip() @@ -32,7 +34,8 @@ def convert_datetime_to_pocketbase(date_time_str): # Convert to PocketBase local datetime format: YYYY-MM-DD HH:MM:SS pb_format = dt.strftime("%Y-%m-%d %H:%M:%S") - print(f"[DEBUG] Converted to PocketBase format: '{pb_format}'") + if show_debug_msg: + print(f"[DEBUG] Converted to PocketBase format: '{pb_format}'") return pb_format except Exception as e: print(f"[ERROR] Error converting datetime '{date_time_str}': {e}") @@ -49,7 +52,7 @@ def upload_entry(data, entry_type='opportunity', url=None): entry_type: 'opportunity' or 'event' url: The source URL of the entry """ - print(f"[DEBUG] Uploading {entry_type} entry. Data: {data}") + print(f"[DEBUG] Uploading {entry_type} entry. Data: {data["title"]}") data = dict(data) # Add URL to data if provided @@ -66,7 +69,8 @@ def upload_entry(data, entry_type='opportunity', url=None): data['datetime'] = convert_datetime_to_pocketbase(data['date_time']) # Remove the original field since PocketBase expects 'datetime' del data['date_time'] - print(f"[DEBUG] Event datetime: '{original_dt}' -> '{data['datetime']}'") + if show_debug_msg: + print(f"[DEBUG] Event datetime: '{original_dt}' -> '{data['datetime']}'") else: print(f"[WARNING] No 'date_time' field found in event data") @@ -81,7 +85,8 @@ def upload_entry(data, entry_type='opportunity', url=None): original_deadline = data['deadline'] # Convert deadline to PocketBase datetime format data['deadline'] = convert_datetime_to_pocketbase(data['deadline']) - print(f"[DEBUG] Opportunity deadline: '{original_deadline}' -> '{data['deadline']}'") + if show_debug_msg: + print(f"[DEBUG] Opportunity deadline: '{original_deadline}' -> '{data['deadline']}'") else: print(f"[WARNING] No 'deadline' field found in opportunity data") diff --git a/scraper.py b/scraper.py index 69a026e..76801b2 100644 --- a/scraper.py +++ b/scraper.py @@ -23,7 +23,7 @@ async def get_clean_content(url: str): js_code="window.scrollTo(0, document.body.scrollHeight);", magic=True ) - + async with AsyncWebCrawler(config=browser_conf) as crawler: result = await crawler.arun(url=url, config=run_conf) return result.markdown \ No newline at end of file