From 4726582379144c30e6f27796ed18c8311accbc6a Mon Sep 17 00:00:00 2001
From: Cailean Finn <Caileannn@gmail.com>
Date: Sun, 10 May 2026 13:14:14 +0100
Subject: [PATCH] first commit

---
 .gitignore                           |   1 +
 README.md                            | 115 +++++++++++
 __pycache__/agent.cpython-312.pyc    | Bin 0 -> 2130 bytes
 __pycache__/database.cpython-312.pyc | Bin 0 -> 4174 bytes
 __pycache__/prompts.cpython-312.pyc  | Bin 0 -> 2291 bytes
 __pycache__/schemas.cpython-312.pyc  | Bin 0 -> 1659 bytes
 __pycache__/scraper.cpython-312.pyc  | Bin 0 -> 1484 bytes
 agent.py                             |  68 +++++++
 bot.py                               | 279 +++++++++++++++++++++++++++
 database.py                          |  97 ++++++++++
 prompts.py                           |  55 ++++++
 requirements.txt                     | 189 ++++++++++++++++++
 schemas.py                           |  21 ++
 scraper.py                           |  29 +++
 14 files changed, 854 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 README.md
 create mode 100644 __pycache__/agent.cpython-312.pyc
 create mode 100644 __pycache__/database.cpython-312.pyc
 create mode 100644 __pycache__/prompts.cpython-312.pyc
 create mode 100644 __pycache__/schemas.cpython-312.pyc
 create mode 100644 __pycache__/scraper.cpython-312.pyc
 create mode 100644 agent.py
 create mode 100644 bot.py
 create mode 100644 database.py
 create mode 100644 prompts.py
 create mode 100644 requirements.txt
 create mode 100644 schemas.py
 create mode 100644 scraper.py

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..2eea525
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+.env
\ No newline at end of file
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..a7e9129
--- /dev/null
+++ b/README.md
@@ -0,0 +1,115 @@
+# null-bot
+
+> A small Telegram bot for extracting and saving opportunities and events from web pages or pasted text. Uses an LLM agent to parse content into structured JSON and stores entries in a local PocketBase instance. This bot uses all open-source tools. The LLM of choice is granite4.1:8b by IBM under their Apache 2.0 License. 
+
+## Features
+- Parse Opportunity (`/op`) and Event (`/ev`) entries from a URL or pasted text
+- Two entry types with separate system prompts and JSON schemas (externalized to `prompts.py`)
+- Follow-up prompt when users paste text: ask for a source URL only when saving
+- Converts date/time to PocketBase-friendly format (`YYYY-MM-DD HH:MM:SS`)
+- Retry decorator for robust LLM / network calls
+
+## Requirements
+- Python 3.11+ recommended
+- See `requirements.txt` for full dependency list
+
+## Setup
+1. Clone the repo or copy files to your machine.
+2. Create and activate a Python virtual environment:
+
+```bash
+python -m venv .venv
+# Windows
+.venv\Scripts\activate
+# macOS / Linux
+source .venv/bin/activate
+```
+
+3. Install dependencies:
+
+```bash
+pip install -r requirements.txt
+```
+
+4. Environment variables
+- Create a `.env` file in the project root with at minimum:
+
+```
+TG_TOKEN=your_telegram_bot_token_here
+OLLAMA_BASE_URL=http://localhost:11434/v1
+ALLOWED_USERS=1234,5678
+POCKETBASE_URL=http://127.0.0.1:8090
+POCKETBASE_ADMIN_EMAIL=admin@example.com
+POCKETBASE_ADMIN_PASSWORD=secret
+```
+
+- Notes:
+  - `ALLOWED_USERS` should be a comma-separated list of Telegram user IDs (no brackets).
+  - The bot reads `TG_TOKEN` and `ALLOWED_USERS` from the environment.
+
+6. Ollama (local LLM) setup
+
+- This project uses a local Ollama instance (or any compatible local LLM HTTP API) as the LLM provider. The bot expects an HTTP endpoint available at `OLLAMA_BASE_URL` (default `http://localhost:11434/v1`). 
+
+- Quick steps to get Ollama running locally:
+
+  1. Install Ollama for your platform — follow the official instructions: https://ollama.com/docs (or use the native installer for Windows/macOS/Linux).
+
+  2. Pull or install a model you want to use. Example (CLI):
+
+  ```bash
+  ollama pull granite4.1:8b
+  ```
+
+  3. Start the Ollama daemon / HTTP API so the bot can reach it. Depending on your Ollama installation this may be:
+
+  ```bash
+  # example commands — consult your Ollama docs if these differ
+  ollama serve
+  # or
+  ollama daemon
+  ```
+
+  4. Set `OLLAMA_BASE_URL` in your `.env` to point to the running API, for example:
+
+  ```text
+  OLLAMA_BASE_URL=http://localhost:11434/v1
+  ```
+
+  5. Verify the API is reachable (example curl):
+
+  ```bash
+  curl -s -X POST "${OLLAMA_BASE_URL}/completions" \
+    -H "Content-Type: application/json" \
+    -d '{"model":"<model-name>","prompt":"hello","max_tokens":16}'
+  ```
+
+  A successful response indicates your Ollama HTTP API is reachable and can serve model requests.
+
+- Notes and troubleshooting
+  - If your Ollama installation exposes a different port or path, update `OLLAMA_BASE_URL` accordingly.
+  - If you prefer hosted LLMs (OpenAI, Anthropic, Cohere, etc.), `agent.py` can be adapted to use other providers; ensure the provider client is configured and the prompts in `prompts.py` are compatible.
+
+## Running the bot
+
+Start the bot with the project's entrypoint (example):
+
+```bash
+python bot.py
+```
+
+The bot listens for commands:
+- `/op <url or paste>` — parse an opportunity
+- `/ev <url or paste>` — parse an event
+
+If you paste text (instead of sending a URL), the bot will parse it and when you click Save it will prompt you for a source URL (or you can `/skip`).
+
+## How it works (high-level)
+- `agent.py` uses `pydantic-ai` + a local LLM provider (e.g. Ollama) and system prompts from `prompts.py` to parse pages/text into structured JSON.
+- `database.py` converts datetime fields and uploads the entry to the appropriate PocketBase collection (`events` or `opportunities`).
+- `bot.py` handles Telegram interactions, queues parse tasks, and preserves per-user state in `context.user_data`.
+
+## Troubleshooting
+- If dates show as `None` after save: verify PocketBase field names (`datetime` for events, `deadline` for opportunities) and ensure `.env` is configured.
+- If the bot doesn't start: check `TG_TOKEN` is present and valid.
+- If parsing fails or you see unexpected behavior, check logs printed to the console for `convert_datetime_to_pocketbase()` and `upload_entry()` debug messages.
\ No newline at end of file
diff --git a/__pycache__/agent.cpython-312.pyc b/__pycache__/agent.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2df0bf8803fda22d606bfbc27a5d0d41c39db667
GIT binary patch
literal 2130
zcmZt{OKcNIboS?6d;QtXhhGaUEg>;&tx}}|QdG%=R3wBr3ZZSRNM`X)gU#CO&aM;U
z+LZ-S6{JeQrHO=)IMRly*B<%wP^p)sp^7ySwWvLmOXZXt`a5)HZO18eq<!=L@9n%d
zZ@voze1NPsJ_PBc2*A&jXe>`1dHNec=70eV#snEA&M*{bO*X^DITz<lJ|n~h7w1h+
zMvRLtE|^kWBD#X<jeE)NF?|_1E@%94KLc0*#f5qZmy*&u8ljuJAucdADjp;j0u0`k
zHaH=|;S4Vf-S#c8buESP7^pxM%7%Q~zrfe|EgO9QfB3D2Vg#US1flu?Z-i!qc-yoP
zR-aI_FoT8Y1k5@Fkz=N*XY`Yn0Zl>%YveGpCQ}APRKc`#BVkw$%uWu4S=<sE9*&KS
zo*wES9ZL+4#7+*6Vt?PeeM6%SDij+WjGl}pjz&-QB~FhF5+~AG-G+%gG7EtTq-RqO
z?2a7hIdqoz^JQr0L{`s0EamEPp9nNf4$D^F$>p7dGo6DtU{Bi)%p{0XCg)%gK?kLv
zjm4yuCD}RHM*y03m6P>zb5DsnyF|XHUyw6D0T@#CB4aS;y8u8Q$5o*qD>xfgCDx%<
z?lRaSa~&Am4X#lk1~jEb(Bu}`H0icyS5xB>F!+x+06t^qxK9KDl(-_7mY&rIpu}(T
z&u{Y2r{(9h4Z-j<`s9D*;wE|EdHY7UmIN<wLJfIZX<*I0C^T{@d5WG>;2`)k`7&^V
z4UKe518eSk0ctOa&Hpi^B7Ys+U>lwoP!umR2450C7N?jgaDf{GQ_Pp#6chFqzVc~g
z4eQ8;TD{7eo;5Uh!9jY`(cFS-hVJMZ?Jz^LvKsA`h->G9D4MW)T#UM^JL%Cz&p{38
zQ<_6uHMp8%X*n8eL%rEhJz8JNIR}x}Y2|Vja`L2!r#m%^FyJIn>vU~M!#1S_i6)-P
zPG|)#5{V2I#4~5kq-`sUL4hM^p}QBQoK#XbwLXL_R7)z_ui3<(mdZ}*X3Ee8PQ`{I
z+9*wPO1FEoun()7f+Tc$k+JM1Q742PN)aB1@>%Su-5`2fa;BbySg^^h$zk42M4*@3
z#vvNy7)+9z=w=d@kV-0uXgRS^i{L@f&YKRF$f+ghsbOEzgnBmNI`YyQB#czj!3?x%
zOViV)>#qa7J>#csi0tvCo-*P1F=(H6tlT)6F=?1|?D1^gG+#SwIpc1iNNyVY+$tn;
zgn>GVuMYCr-xBaE_@fM}E#ReBZ<W5^bL0pA&Tn2R_pN(?;F~>uHGXyQCb#I@`+)Cw
z#CvBuXND@R+vW~mJ3Rm1a%;y*priawCDeYUbh%VMP8GUlVpR#W>{wCvEU9~LhHs55
ztFN!9hnCbsU$@_3m(|`CwSP(NUsg|)2dcbRP=0CKHaBo>V7{>2wtv=J^#Eo2qO`60
z0toK=bA1=^D=YHuC3*MnfEAQSQeaUzcIW*&1B=1f18KMt49$vvR|O*a$394Y=3*4|
zwla5F=IB2DE*F&vaWAyxXcxHG!BV`dwO8Tpy`}WZ+<jRf_<e;V{4j&sh;_If%eiSo
z&yo&G=qW_T3c%YpaU$+uwIh~0;1PW=@Qa(}jbXc?LMI;f)J8p`&m?9o8||csSR1-F
zqVo;W`eM%nq;CTD)dJAzj(O@63vOi0kxoNfsQy+8XdyobPYDEQ9|2f8LiF+`d>g$<
zNctSwR|%+c48yE~ovYx$Dmb_bc0B~%hd}-jbUz02OyH-$mf4XjW0%Jkw{|TD!e!rM
zv3<VqK-^#E*F3=6RtfE@1k`nb^Ri`O%@5jlRCsSiX|1&Et@s1$UZI^`1nQasLTwGf
cu9_gkmgTkJZbq3;Rsq>=+3R#e-6W}h0kr=k-~a#s

literal 0
HcmV?d00001

diff --git a/__pycache__/database.cpython-312.pyc b/__pycache__/database.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..db655c24b54a923747b45721b4789da88ba44fe0
GIT binary patch
literal 4174
zcmbtXU2GHC6~1GS?eV{z#K9p=7zoBOkf7yf0WW0Nj?KRlhzT@KQrGcJz{c^oGh-mG
zy^2;mg!ZA~iAXCX`j!={8mTXSj3}*A^`T?zRifEY?W+6Ix9q~hioW#TJD%9dLaVAH
z+4tw1^WAgK-20t7f3;do06u@ugY*w}0RDjn>Z7lBUIPMv$AAG0kpVH(YBI!(CZ-`!
zUz^d+kTDYX$&7ABAJgN$E@PN6#0&(`09Ny?4uwY~oB?Tvx=f>G^v~c*=uz94ku|2<
zl)2{r^seTS(tj_iK}CD72Bm5|(<oRm3uAl$Vphh?m>v)@+brR;{1b_We1vSya0w>P
za3Y(%3x}qwT=EVp1``5{gyF2n&qkBC*_i|!QcOZ*#ncQd+pZ1|UJhRm2BP8k=*Shh
zy=fc>MJ^7-!;!$nD{{w==C1~#(VN2~p=3Qd*hLEdYvJ$ppYh8n!k2TW7XU$11OhRu
zF#>T*o6#1*=YSzaq>rECR)XQY?ukwfp8!RW)~Q;JmeDf?H8t;)>zd?-hMn3|Uw;WO
zR1w?-{4UX`%F=iPsw1U^(HPbBN<_B(ziOKr+C?I5-HwJvj9HDbq-n~GrKo`!us+eM
zQkVfv6!ZYmref>|{E?Ej$;O7WX)kI&2T#a`9piw<f+~?kGTo}$Hd-|U1Q>(Ir=<N<
zJF8LJd&E6(Upofw5k6<(k%@-SAeX(%@*=J79a<3iRQ47<&2uw!DAX5;^o_xXzHq@G
ziO|y=Ka&t?k)s<Mv0<u6`!Q~ly!Q&1Ok{jCdq2k}MZr%aKm;5LDX{)%6x$TELq`Hb
zh3@fCI5>KKQuPhFqq^s(y#=F}?iFWqte=(%pG@@1Kqlyd&BOG0X8Js1wC94~6Dd&Y
zxZw1RVF{y1{GMpRw_^#G*%6Zx3S3PlyoH|e@W{yU$Rr)+d5))(O3Iquf?jF8ews%|
z+Gmn=IhaFHCXtu2foF4>M3R+tD785rQJqY|eh!~i)SJdVWB7iOg_B%XHi|s7m`Wt?
z$VQA6XYVI{I@yZy5J%c^XeMhI5z43HinlVs@`$ShbgIz1rQaSO^iPZmEH6wX6R8Y4
z5n_cqBA1)E%5!P-E+(@1Or~#&6DJ^WVhYkqe{NRpu16SGvmO_@cn&iUQsKKHz6CAx
z9sHXEn<hZH7IiNTUB8`Nx3_MRE?wIOWtANLOQj{QJoZ9dja1Laq;uoa_=IGetWiJP
z)Ps%#Ywo^^yKgDEe6;HRaLqkXaSxP*m0;C<e$5@JxFc2f@VpgjTI>sb74x1AXUCdz
zU&Xoa=~?M$u<9Iq7<{7#ExXmp<%3n{+4&$eaP4}0`O#&GK2~izK5xXXoG$yyoht#!
zcCJR9U$?s@%KgKJx$S#E)_py;99jN&rDr9v5|wNhYt*H$DEB($T%$TGRA-Iq+OW6&
zYtsm|-wJLR^{;|m!CvsZ*EmRPpMT&P?9~3TQwRIWE#8Alh@CKRzJ&YYaWhL2MFMgG
z<j?Kr`dzG9f-J$e7NIVqJp%!o*nvch@<h|%1tTaDj1F-#KgZ}X`yGLH`VMFZd1Z@j
z)j8D?jtvMK&7W;vfkMo~7?B)dz`m)O!L#NjZ?;&EXhyK$7@K?))lv`ATf7XR>kng9
zeM+0vxklTlB_*<HWm}Qlo`)hC1&kfXe;E31$MNg7#gFIIwUEw^+&YT7dgNI1KSu6U
zakfR?@;^r2s-pbAk;96DPA7g~Md>Z+n{w4w)Njuf<6_z~hLWLZNIO-lM%$<*p8=-5
zr2kakR6}+Z)=h|JMsw(vLbK>Lf_PNC&C-cnj^`kCLB49P6-h3Wfy4tjPAL=EKEU4+
z{J4kE5EJ?7Pzq}$AR=TVF_Fq5zD3B&GDvI71W(14n&9PfIgS_e*_1fzr6FB=*}G8g
z-BQl;89#j;0tsB6PqH+;Ako}(0~#w9TGR@m1cw$c*p#*V=@52N)<UR4tEw1a7+6Li
z8mvn?yryhK1d8bzvwopN)eR$;wiPwKm+l*&y*#`agTm4An}Ly`i$kym4so=13&l%M
zr`QYw%Wyu+(5Wnqm%$k;_*Lvdo=u3zU!F~JJXTXuAb4Ra)%4T%QsV7;u4O&O5egm^
zBbrYpSwWbF)dA)o0m9G}JQy9tun8uU%Cdz$s=;t0>9i_Ewff|aNa}_^3Z&_!Dd7ZE
z<adSB&`z=MkIK4Dt<bMl@pGsY;)0*wv2h!hmU=B8@)>#b@gS27+>f~M$(r1hY-%J@
z)+_np+o2w=zdR}sJ__2vZ6cc*o0F#zl$qmGw^G?eCJw8&o@a%8MwFe6$$E<TpQv~O
zx__gSfgZjKVOSK;;~1Mi0;dkapYS_)51#{XT7b?ZS-KYsi<zbVa$8w1nLe(OpKO@z
z3*@i;bD{6_fHJSQcg&OXqYurvKK48ve0oB%`D)a`P12*Yt(#qoyIz{<ukE`RnTmbi
zdiTDkA62@KESSIT39Rg2`DG;-t@d1BAQxg42mN)Md$Ch$^K3X=lKX?@L(5&|>6PoN
z2P@%GDRe`+aZ_sl>6ebNZ;|*?&2a_RZEK6vHnb|PPD?GLHOq|+ShFV|O+GzUbsb)F
z9jmyGEr+VE)AQ#xEKaHIbooHpEwuz|mcjMbj`;|z@s8b#*DLnD>-&3`c31WvmmGT)
z#|o9;NVO+gcMLmpQaW`}I(cdJfOKtCy7BYMwS;taN}5Va?aY@B7CSXub6l%C1yy3w
z=(yA}QL{{{Zhc&J9b0qxD=vTec-8gEd)!iQ6gHgFf$}fPNy!$hQG+|~66=%|-z98s
zg+1`K{UJz(PJrj90%u_J;)F4LK>K2#C%jktr@cDZ_i5mkX&kR1pIz2)f~;3Q3VC$F
zkWEduGZIZ01pHMf@Tlf1S5Eyc7JWtXUL@Bin3+^okXsUY@pc?>K|GfbgnN)MWD}O5
z%U=TTc)Y*90I#7PP_`d+W`<+(8FqmG2+pC8Kj9Yq+td;S@f9$>0^P5`-dAAHE70`~
zFh8(<qq8p<7c`HJkBrjpgH^|&s_yWdX5DK4&Dg`SHLI^;^)309_Wj=Xg-<$urFt;3
kI=jmMZT2s-(wJBs&R4B>=S<&|T}0cPfe#5A?}uys8}1q=Bme*a

literal 0
HcmV?d00001

diff --git a/__pycache__/prompts.cpython-312.pyc b/__pycache__/prompts.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3230e721979819d8dab6923689d2561729f14fcc
GIT binary patch
literal 2291
zcmeHJ%Wm676qVu{4TyNt0!6dxg}bl~Nc1c5LMRHTmQy>i9<XS)4g|zwax9NU&9E~=
z*>utG=r{C3y6PWz+aGX|UAyU-5#_o`x(QIAi*i8>Id=~4+<VR${<5$TRB-+LUFG5k
zqhH;_?5dW7*N-sxv656?R+8$=YVlxYj(zq)rE;utVww}9O!HV6PAz5`?Oa*SVk?xS
zeunof6~=~yXiY=KQWA!&qFBiZ*Oqc=tj=SbYn}kk8d6!Nw9Tck1zCP&Nl3&qLY#<g
zTF?ZL2|e$3JETr8cx=NU_?)6<|3%Q0EG@1%*>jGp)I=me^&m_p1TdEx0MC>jGoPae
zM>x{=h$CK@Mz9{zk`*@POAU&E<;SGXJn+2)gAD{KJuJf{8}qv;J!Dc`3pu1yl@zPE
z32uT-hg@Vc;FwuHRJx#L9uC7A9dLu3Wn9$gSuPW-u2DDRlD1iz*616hN9J5*D`D^i
zKqeoLnJ)14CT-RYmljJoCEUbXWPX3h`SP6EHxow<CMun969!KKlyH`$Le8?9J>Ko*
zua<=L%-60#tyaC=t{>wHscDc3r-Cz08*3X|B+h6oj6wcku!Rq#3OfZS%0gx5g%L5x
z5TnbhvC5@I4V0#(uJn2MmTq@D{pg^%*NOT;9i;(DBy~Ic$9^~W2!P$VvJr|gO4OyK
zQx=b`h)0H&PfkuQ49FOUAz_Tl)v2YZ(~YnyPID|{vcTACm4Jw@8AxQxPvetDo{mFq
z<=z>XcqVw7I3AZ<tWgg7EOl0!OFr+NZ=3Dj{tk5yquyZ@lo<ry2SkgWNsA3yY>A<;
zK;BhaTq|jO-4<&u284Q7#B|b;$Iad`W%|0Lf}3fQp0S8PbV|xEL0yodS0BI1QU&cE
z9fY(ut%C6q6X04BP9bM3aG2{)>+9=v*V^gate<Y~E3VPA8hy<nDp@QBKe#QkKj!`w
zkKhx~{6Aw1PVuXMa|fT_`}slwGk9gi_)dm+207ticsJ9c$A6a!X?b_I(QdDJ896RG
z5lUsrI2#6MllL->X}v*qcB*n{z}dyD`){flt;zZ7|C(;PbF5#CGi5|R{%2)R<5VoT
zElSPJDBy&(u~uK(@}i^lCylkWx0L=|2?_7<Drb~1I2Vp6mcsWOZ7$2x!r<g{dUt2h
zJu?WY&AD}luD1oZBF;5~1DboTkmfQz_i&P1hv+>0Z4=m6GEp#r!xyFgJ1Y-vKI-;*
z-Gk_`vlkt|>K%03z3ArA&e2XMDxZF;ymo&Xzi)0g28V`gGl-c;`Jlzk$f|76)5;z2
zW+3x4t)D78z^OjYtO>K?=HVA(ICjdv)E@%F{jQkDxZTcGtJT|wpUwxjbCv4Hw~bHc
MgWoV((DRu66PEGjYybcN

literal 0
HcmV?d00001

diff --git a/__pycache__/schemas.cpython-312.pyc b/__pycache__/schemas.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..58410ba66eceadebb453c3787c755186fa7fd132
GIT binary patch
literal 1659
zcmcgs&u<e)6rS<!+H0GT;0B`-wc56{V3b5^k&0B3mc%xcl0wxI0V_+Z#WRVQ?hki%
z+-iMtr6T3%8~GOy{1v_S5|llZy>Q^v8&Y!Wsqc;L#6mfD4&Tmu-^`oY_q{iN&CHYt
zj6WmtcvmCj4H}bSj3);YCQk_?jQS*?HVtxij-s9OwLrJ^z_1NUa^xmq+8>1JTz?L+
zaM*d}HNcza`6F*Zc?;k*r@SVoyui(^-21q*m=tTJe=tZbml>O(7lm12!;@UNK6r}j
zZp^=r822;dhR1!@#-)=nKAs#r0?$*ziA@=?bDMBUt}*%|_neNF+B!FQo@py!Do&r`
z3W_t1xcn4nDz1QBV%CbI`!qu-lK#TI4!1%#;8wI{Ntj37ZX{$c^knjkW@X0Xw(z<V
z|HzP@^m%^{w?tyw4ZYY^ga%gs!kTr(ig_q`*ydK;3j$Xp81=V~D<QNOTV9+M#>5@C
zhUqwXvg2eW#|a|V^KpL4aenE!{)iEIxU(n&WLhjmHtRTU7)DZ^8#|6Th5QV_Gjae(
z{=HKFs<jz&5x3f|=kr#B$3M%c+xkI7k9k|ht+40&OOGPiirXC?xbbo~5hg}Kz~kaC
z0G^Onm2-O+*3!!Qpt)1u*VmQ(Wm>s9sH1&V+25s=+k=}s^;Ewt<|jc;R@Vl4T3tqM
zjq8kJ0da%n#>#=dFZ3hDp~QfjsG4W_DXyrvV^xjjuaN!B;Oe8xq|5u~8?NM*8!}6J
z#~P~@q6i0PMFtBTB>QJKqPEKGq!dPhOn>aYQ!9zn_>Z#)a|n2Mp(9iflvw9+b_QVq
z;XMSEnzJ}NHv!Z%{v6;5`TLX0gT}D1TX;EV?dz9cRTuUyHPUKxaBHaT>rG{UlUA<}
zz8z|*eti@T1D%}c|E-P6caM9ezlvRfImhfxHIa4cZkLBv-Sz!N>qakRUbwwz{m5gF
zL8By#7MJbini4~$v%h>F`vtlMyOD91`CiB^FI4tcBm!XJ2*#FzU@1v5bF!v?W?Dcf
z1(7HTpw-^hO$nvMQ$i{6K15|slI#N{5K!_-bD&1sX|)5wfkqvr(Sx-55H;G@A1eD=
zT3s1wwldNTgFqE!dB_u_LEy`$$|p0s33Ef~wFN%pqe4<msp<vyWo7kX{iK@qovO;r
zmC^I@wfGp8;&T}{0N!ep(%0mp*W}YfQ==PnSUe>7JuGQ-jqcn+YfV|J!x|c^Z%br$
Ue%Rc-wm1LT%hMNsFRPpX2MMx}&;S4c

literal 0
HcmV?d00001

diff --git a/__pycache__/scraper.cpython-312.pyc b/__pycache__/scraper.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0995313c97fd066a5396421ff345a7eca7133e34
GIT binary patch
literal 1484
zcmZ8h-%le&9G}@=Ew#OtqY44<SPqk;5er;7NZ>G1&KwB`xp<h%HBC0#-D$h9-EC&4
zmSV{fF-8*;_2qcbhl@`dFu8vLUVQLLFXoz!i6*{yZy1_Lj1PXNECik1+5OIE=KEtl
z-|x&1u~-B_``$%sqY^^D^TAmY0^xWBgk5AJ6WgeQbGRbpgo>CGD^gCv$P|cN6io3$
zDW^1~l>CCLr6fN-?=>9b0Vym{y>1ig$FkI|dxS2yPSGm)i2#46<^)LJZBo>0He1%|
zy6M)PJH#PWXD;={8nq1^#KV7G(08C8&j7lFFvQ2s=S>U`MD&%oFYN{B7G@$><js`#
zm2+HR@+k^<$PAZ|+4VJUVl!gCWa9nkUg&goOJsqBP*d2Co#{`a9c-fa!S-WeSNuqp
z(U#Pd)_8pXYfaP?_b=@QtnL7x9W61?lAH2cCl~)!GX&rro^w4IU-X92eKd_ey#;+3
zde<<TKs$U5Wb<*y_B4`_hwB1H4}m9)8fZg&i0b$|v5r&ml~u|kN{N2dDmgACnr%7j
zo*xTFRz)un&yN&bv!T^3la*8054lxlxsJE#F1wqSZR=w<)8p#M1IvN5dFsllIx(Ie
zpH%^vnw(WPrY2MBe6?y5sD>qrjor*lr!!OP$kO8K^4(F@w$_PyhZyT_N?j;Zw?f7y
zZrn(N39x!!FY43^RKRzn1|_6HP!98zLZhmC-V4qf>_Bw%_k6`Os8waY?6HPTN~dgw
z-`X6jccM$fv}@a|?#TG4YPv?P0*2Cjcfrgeu}Wo@noWiLh@l&0qE%dz_{q9UP0es?
z4%1kf60hvqrXM34hFvp>sWH9e`Ijp}<!TUKP`Z64vGs;lAVp3tC9Fmrt;{O6A6oOk
zAlR0HW6OZODMsgLqy)-qjrQ|xDOBMyq!UqIsg#$oFX^=A_~A2uu!?K=(TaJ&b+mAL
z;J}Mh^uwTWDus;TL@KZkIBy>Jpmy5dnT47ByTGuQH+0J;`P;->XKporkGgBbU|!y-
z+4dU+m*s&-y-H}h+VHzeglUFN^o|1tjM8B+$s_f?gwG%7WE=g`^ZIYEs;w(St^R>O
zLsEC7Eh<T6`))gdl0!!lO1>?$WwC=qP(p3>479Ec`mc<)2Clb;UTY=hTbC}k<Kcv|
zz1U8o#NZSAc|7xTJoB{P>b<(#{G$1d_;m5}=Cj_p=iPH3E3Np;pFR5I(O)u3Ov1vS
z;+d0H_uR>m2;&m~;QcY4`Nzvad>_o?>;OIxaTX~DlE~#yZ&ncxuHvkq91O;}oQ`sQ
z24^MZU{(S7P{4p6N?i9)QL=rZ!zj+iqld8&my;pDSD=i-9l9rVtCPA5ndG$lqQ_{5
zt#8pWj1j%Y(eN#(x|&Vir2O*bcbL};pCb`t{43I)A?--&$AeFL+6aF1I?VkGOTCa%

literal 0
HcmV?d00001

diff --git a/agent.py b/agent.py
new file mode 100644
index 0000000..301b6c3
--- /dev/null
+++ b/agent.py
@@ -0,0 +1,68 @@
+from pydantic_ai import Agent
+from pydantic_ai.models.ollama import OllamaModel
+from pydantic_ai.providers.ollama import OllamaProvider
+from dotenv import load_dotenv
+import os
+import json
+from prompts import OPPORTUNITY_PROMPT, EVENT_PROMPT
+
+load_dotenv()
+
+ollama_url = os.getenv("OLLAMA_BASE_URL")
+
+prov = OllamaProvider(base_url=ollama_url)
+
+# Use qwen2.5:3b or phi4-mini for low-end hardware (RAM < 8GB)
+model = OllamaModel(
+    model_name='granite4.1:8b',
+    provider=prov
+)
+
+# --- OPPORTUNITY AGENT ---
+opportunity_agent = Agent(
+    model,
+    output_type=str,
+    system_prompt=OPPORTUNITY_PROMPT,
+    retries=5
+)
+
+# --- EVENT AGENT ---
+event_agent = Agent(
+    model,
+    output_type=str,
+    system_prompt=EVENT_PROMPT,
+    retries=5
+)
+
+async def parse_page(content: str, entry_type: str = "opportunity"):
+    """
+    Parse content and extract entry data based on type.
+    
+    Args:
+        content: The raw text content to parse
+        entry_type: Either 'opportunity' or 'event'
+    """
+    # Select the appropriate agent
+    agent = opportunity_agent if entry_type == "opportunity" else event_agent
+    
+    # 1. Run the agent (which returns a string)
+    print(f"Parsing {entry_type}...")
+    print(content)
+    result = await agent.run(content)
+    raw_text = result.output
+    print(raw_text) 
+    
+    # 2. Clean the string
+    # We remove the markdown decorators so json.loads doesn't crash
+    clean_json = raw_text.replace("```json", "").replace("```", "").strip()
+    
+    try:
+        # 3. Convert string to a dictionary
+        data_dict = json.loads(clean_json)
+        
+        # 4. Success! return the dictionary to main.py
+        return data_dict
+        
+    except json.JSONDecodeError as e:
+        print(f"Critical Error: The AI sent invalid JSON. Text was: {raw_text}")
+        raise e
diff --git a/bot.py b/bot.py
new file mode 100644
index 0000000..4799ddf
--- /dev/null
+++ b/bot.py
@@ -0,0 +1,279 @@
+import os
+import asyncio
+import logging
+from dotenv import load_dotenv
+from functools import wraps
+from telegram import Update, InlineKeyboardButton, InlineKeyboardMarkup
+from telegram.ext import ApplicationBuilder, CommandHandler, MessageHandler, filters, ContextTypes, CallbackQueryHandler
+
+# Import your existing logic
+from agent import parse_page
+from database import upload_entry
+from scraper import get_clean_content
+
+load_dotenv()
+
+# Configuration
+TOKEN = os.getenv("TG_TOKEN")
+_allowed_env = os.getenv("ALLOWED_USERS", "")
+if _allowed_env:
+    try:
+        ALLOWED_IDS = [int(x.strip()) for x in _allowed_env.split(',') if x.strip()]
+    except Exception:
+        logging.warning("Failed to parse ALLOWED_USERS from .env; defaulting to empty list")
+        ALLOWED_IDS = []
+else:
+    ALLOWED_IDS = []
+
+if not TOKEN:
+    logging.warning("TG_TOKEN not set in .env; bot will not start without a token")
+
+# Setup Logging
+logging.basicConfig(format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', level=logging.INFO)
+
+# --- Retry Function with Exponential Backoff ---
+def retry(max_attempts=3, backoff_factor=2, initial_delay=1):
+    """
+    Decorator for retrying async functions with exponential backoff.
+    
+    Args:
+        max_attempts: Maximum number of retry attempts
+        backoff_factor: Multiplier for delay between retries
+        initial_delay: Initial delay in seconds
+    """
+    def decorator(func):
+        @wraps(func)
+        async def wrapper(*args, **kwargs):
+            delay = initial_delay
+            last_exception = None
+            
+            for attempt in range(1, max_attempts + 1):
+                try:
+                    return await func(*args, **kwargs)
+                except Exception as e:
+                    last_exception = e
+                    if attempt < max_attempts:
+                        logging.warning(f"Attempt {attempt}/{max_attempts} failed for {func.__name__}: {str(e)}")
+                        await asyncio.sleep(delay)
+                        delay *= backoff_factor
+                    else:
+                        logging.error(f"All {max_attempts} attempts failed for {func.__name__}")
+            
+            raise last_exception
+        return wrapper
+    return decorator
+
+# --- The Queue System ---
+# This ensures only ONE crawl/parse happens at a time to save RAM
+task_queue = asyncio.Queue()
+
+
+def build_entry_summary(data, entry_type, saved=False):
+    if entry_type == "event":
+        event_datetime = data.get('date_time') or data.get('datetime')
+        return (
+            f"✅ **{data.get('title', 'Unknown')}**\n"
+            f"🦆 Org/s: {data.get('org')}\n"
+            f"📅 Date & Time: {event_datetime}\n"
+            f"📍 Location: {data.get('location')}\n"
+            f"🐊 Summary: {data.get('summary')}"
+            + ("\n\n💾 **Saved to PocketBase!**" if saved else "")
+        )
+
+    return (
+        f"✅ **{data.get('title', 'Unknown')}**\n"
+        f"🦆 Org/s: {data.get('org')}\n"
+        f"📋 Type: {data.get('type')}\n"
+        f"🦢 Deadline: {data.get('deadline')}\n"
+        f"☁️ Location: {data.get('location')}\n"
+        f"🐊 Summary: {data.get('summary')}"
+        + ("\n\n💾 **Saved to PocketBase!**" if saved else "")
+    )
+
+async def worker():
+    while True:
+        # Get a 'task' from the queue
+        update, context, source_value, entry_type, source_kind = await task_queue.get()
+        try:
+            await process_link(update, context, source_value, entry_type, source_kind)
+        finally:
+            task_queue.task_done()
+
+async def process_link(update, context, source_value, entry_type="opportunity", source_kind="url"):
+    # Handle both message and callback query contexts
+    if update.message:
+        status_msg = await update.message.reply_text(f"⏳ Crawling & Analyzing...")
+    elif update.callback_query:
+        status_msg = update.callback_query.message
+        await status_msg.edit_text(f"⏳ Crawling & Analyzing...")
+    else:
+        logging.error("Could not determine message context for status update")
+        return
+    
+    # Store source and type for potential retry/save
+    context.user_data['last_source_value'] = source_value
+    context.user_data['last_source_kind'] = source_kind
+    context.user_data['last_entry_type'] = entry_type
+    
+    try:
+        if source_kind == "url":
+            markdown = await get_clean_content(source_value)
+        else:
+            markdown = source_value
+
+        extracted_data = await parse_page(markdown, entry_type) 
+        
+        if not extracted_data:
+            await status_msg.edit_text("❌ Failed to extract data from that page.")
+            return
+
+        # 2. Store data temporarily in context to 'Save' later
+        context.user_data['last_extracted'] = extracted_data
+        context.user_data['awaiting_save_url'] = False
+        context.user_data['pending_save_url'] = None
+
+        # 3. Show Result with Buttons - format depends on entry type
+        summary = build_entry_summary(extracted_data, entry_type)
+        
+        keyboard = [
+            [InlineKeyboardButton("💾 Save to DB", callback_data='save_db')],
+            [InlineKeyboardButton("🗑️ Discard", callback_data='discard')],
+            [InlineKeyboardButton("🔄️ Retry", callback_data='retry')]
+        ]
+        reply_markup = InlineKeyboardMarkup(keyboard)
+        
+        await status_msg.edit_text(summary, reply_markup=reply_markup, parse_mode='Markdown')
+
+    except Exception as e:
+        await status_msg.edit_text(f"⚠️ Error: {str(e)}")
+
+# --- Handlers ---
+
+async def start(update: Update, context: ContextTypes.DEFAULT_TYPE):
+    if update.effective_user.id not in ALLOWED_IDS:
+        return
+    await update.message.reply_text(
+        "Welcome! I can extract arts opportunities and events.\n\n"
+        "📋 **Commands:**\n"
+        "/op <url> - Extract an opportunity\n"
+        "/ev <url> - Extract an event"
+    )
+
+async def handle_opportunity(update: Update, context: ContextTypes.DEFAULT_TYPE):
+    user_id = update.effective_user.id
+    if user_id not in ALLOWED_IDS:
+        await update.message.reply_text("Unauthorized. User ID needs to be added!")
+        return
+
+    if not context.args:
+        await update.message.reply_text("Please provide a URL or paste text. Usage: /op <url or text>")
+        return
+
+    input_text = " ".join(context.args).strip()
+    if not input_text:
+        await update.message.reply_text("Please provide a URL or paste text. Usage: /op <url or text>")
+        return
+
+    source_kind = "url" if input_text.startswith("http") else "text"
+
+    await update.message.reply_text("📥 Link queued for processing...")
+    await task_queue.put((update, context, input_text, "opportunity", source_kind))
+
+async def handle_event(update: Update, context: ContextTypes.DEFAULT_TYPE):
+    user_id = update.effective_user.id
+    if user_id not in ALLOWED_IDS:
+        await update.message.reply_text("Unauthorized. User ID needs to be added!")
+        return
+
+    if not context.args:
+        await update.message.reply_text("Please provide a URL or paste text. Usage: /ev <url or text>")
+        return
+
+    input_text = " ".join(context.args).strip()
+    if not input_text:
+        await update.message.reply_text("Please provide a URL or paste text. Usage: /ev <url or text>")
+        return
+
+    source_kind = "url" if input_text.startswith("http") else "text"
+
+    await update.message.reply_text("📥 Link queued for processing...")
+    await task_queue.put((update, context, input_text, "event", source_kind))
+
+async def handle_followup_text(update: Update, context: ContextTypes.DEFAULT_TYPE):
+    if update.effective_user.id not in ALLOWED_IDS:
+        return
+
+    if not context.user_data.get('awaiting_save_url'):
+        return
+
+    text = update.message.text.strip()
+    if not text:
+        await update.message.reply_text("Please send a URL or type /skip to save without one.")
+        return
+
+    if text.lower() == '/skip':
+        url = None
+    elif text.startswith('http'):
+        url = text
+    else:
+        await update.message.reply_text("Please send a valid URL or type /skip to save without one.")
+        return
+
+    data = context.user_data.get('last_extracted')
+    entry_type = context.user_data.get('last_entry_type', 'opportunity')
+    if data:
+        upload_entry(data, entry_type, url)
+        context.user_data['awaiting_save_url'] = False
+        context.user_data['pending_save_url'] = None
+        await update.message.reply_text(build_entry_summary(data, entry_type, saved=True), parse_mode='Markdown')
+
+async def button_handler(update: Update, context: ContextTypes.DEFAULT_TYPE):
+    query = update.callback_query
+    await query.answer()
+
+    if query.data == 'save_db':
+        data = context.user_data.get('last_extracted')
+        entry_type = context.user_data.get('last_entry_type', 'opportunity')
+        if data:
+            if context.user_data.get('last_source_kind') == 'text':
+                context.user_data['awaiting_save_url'] = True
+                context.user_data['pending_save_url'] = None
+                await query.edit_message_text(
+                    build_entry_summary(data, entry_type)
+                    + "\n\nSend a source URL to attach it, or type /skip to save without one.",
+                    parse_mode='Markdown'
+                )
+            else:
+                url = context.user_data.get('last_source_value')
+                upload_entry(data, entry_type, url)  # Pass URL to save with entry
+                await query.edit_message_text(build_entry_summary(data, entry_type, saved=True), parse_mode='Markdown')
+    elif query.data == 'retry':
+        # Retry processing the last URL with the same entry type
+        source_value = context.user_data.get('last_source_value')
+        source_kind = context.user_data.get('last_source_kind', 'url')
+        entry_type = context.user_data.get('last_entry_type', 'opportunity')
+        if source_value:
+            await query.edit_message_text("⏳ Retrying...")
+            await task_queue.put((update, context, source_value, entry_type, source_kind))
+        else:
+            await query.edit_message_text("❌ No source content to retry.")
+    else:
+        await query.edit_message_text("🗑️ Discarded.")
+
+# --- Main Entry ---
+if __name__ == '__main__':
+    application = ApplicationBuilder().token(TOKEN).build()
+    
+    # Add Handlers
+    application.add_handler(CommandHandler("start", start))
+    application.add_handler(CommandHandler("op", handle_opportunity))
+    application.add_handler(CommandHandler("ev", handle_event))
+    application.add_handler(MessageHandler(filters.TEXT & (~filters.COMMAND), handle_followup_text))
+    application.add_handler(CallbackQueryHandler(button_handler))
+
+    # Start the worker thread
+    loop = asyncio.get_event_loop()
+    loop.create_task(worker())
+
+    print("🤖 Bot is running...")
+    application.run_polling()
\ No newline at end of file
diff --git a/database.py b/database.py
new file mode 100644
index 0000000..58576f8
--- /dev/null
+++ b/database.py
@@ -0,0 +1,97 @@
+import os
+from dotenv import load_dotenv
+from pocketbase import PocketBase
+from schemas import EntrySchema
+from datetime import datetime
+
+load_dotenv()
+
+pb = PocketBase(os.getenv('POCKETBASE_URL'))
+admin_data = pb.admins.auth_with_password(os.getenv('POCKETBASE_ADMIN_EMAIL'), os.getenv('POCKETBASE_ADMIN_PASSWORD'))
+
+def convert_datetime_to_pocketbase(date_time_str):
+    """
+    Convert datetime string from DD-MM-YYYY HH:MM format to PocketBase datetime format.
+    PocketBase (Local) expects: YYYY-MM-DD HH:MM:SS
+    """
+    if date_time_str == 'N/A' or not date_time_str:
+        return None
+    
+    try:
+        print(f"[DEBUG] Converting datetime: '{date_time_str}' (type: {type(date_time_str)})")
+        
+        # Parse the input format: "DD-MM-YYYY HH:MM" or "DD-MM-YYYY (HH:MM)"
+        date_time_str = date_time_str.replace("(", "").replace(")", "").strip()
+        
+        # Try with time first
+        if " " in date_time_str:
+            dt = datetime.strptime(date_time_str, "%d-%m-%Y %H:%M")
+        else:
+            # If only date is provided, set time to 00:00
+            dt = datetime.strptime(date_time_str, "%d-%m-%Y")
+        
+        # Convert to PocketBase local datetime format: YYYY-MM-DD HH:MM:SS
+        pb_format = dt.strftime("%Y-%m-%d %H:%M:%S")
+        print(f"[DEBUG] Converted to PocketBase format: '{pb_format}'")
+        return pb_format
+    except Exception as e:
+        print(f"[ERROR] Error converting datetime '{date_time_str}': {e}")
+        import traceback
+        traceback.print_exc()
+        return None
+
+def upload_entry(data, entry_type='opportunity', url=None):
+    """
+    Upload entry to the appropriate PocketBase collection.
+    
+    Args:
+        data: Dictionary containing the entry data
+        entry_type: 'opportunity' or 'event'
+        url: The source URL of the entry
+    """
+    print(f"[DEBUG] Uploading {entry_type} entry. Data: {data}")
+    data = dict(data)
+    
+    # Add URL to data if provided
+    if url:
+        data['url'] = url
+        print(f"[DEBUG] Added URL: {url}")
+    
+    try:
+        if entry_type == 'event':
+            # Map 'date_time' from agent to 'datetime' for PocketBase
+            if 'date_time' in data:
+                original_dt = data['date_time']
+                # Convert and map to PocketBase field name
+                data['datetime'] = convert_datetime_to_pocketbase(data['date_time'])
+                # Remove the original field since PocketBase expects 'datetime'
+                del data['date_time']
+                print(f"[DEBUG] Event datetime: '{original_dt}' -> '{data['datetime']}'")
+            else:
+                print(f"[WARNING] No 'date_time' field found in event data")
+            
+            # Upload to events collection
+            print(f"[DEBUG] Creating record in 'events' collection with data: {data}")
+            result = pb.collection('events').create(data)
+            print(f"[DEBUG] Successfully created record: {result}")
+            return result
+        else:
+            # Opportunities - convert deadline to datetime format
+            if 'deadline' in data:
+                original_deadline = data['deadline']
+                # Convert deadline to PocketBase datetime format
+                data['deadline'] = convert_datetime_to_pocketbase(data['deadline'])
+                print(f"[DEBUG] Opportunity deadline: '{original_deadline}' -> '{data['deadline']}'")
+            else:
+                print(f"[WARNING] No 'deadline' field found in opportunity data")
+            
+            # Upload to opportunities collection
+            print(f"[DEBUG] Creating record in 'opportunities' collection with data: {data}")
+            result = pb.collection('opportunities').create(data)
+            print(f"[DEBUG] Successfully created record: {result}")
+            return result
+    except Exception as e:
+        print(f"[ERROR] Failed to upload entry to PocketBase: {e}")
+        import traceback
+        traceback.print_exc()
+        raise
\ No newline at end of file
diff --git a/prompts.py b/prompts.py
new file mode 100644
index 0000000..9d19bd8
--- /dev/null
+++ b/prompts.py
@@ -0,0 +1,55 @@
+# Central place for agent system prompts
+
+OPPORTUNITY_PROMPT = (
+    "You are a precise Data Extraction Specialist. Your goal is to convert "
+    "unstructured arts opportunity text into a strictly valid JSON object.\n\n"
+    "# TASK\n"
+    "Analyze the provided text and extract information into these JSON keys:\n"
+    "1. 'title': The title of the opportunity\n"
+    "2. 'org': The name of the organizing body/bodies\n"
+    "3. 'type': The category (e.g., Residency, Funding, Open Call, Workshop).\n"
+    "4. 'summary': A 3-sentence description of what the opportunity involves.\n"
+    "5. 'deadline': The deadline of the opportunity. Format: DD-MM-YYYY. Assume year 2026 if missing.\n"
+    "6. 'location': The physical city/country or 'Online'.\n\n"
+    "# CONSTRAINTS\n"
+    "- Return ONLY the JSON object inside markdown backticks (```json ... ```).\n"
+    "- Do NOT include any introductory or conversational text.\n"
+    "- If a field is missing, use 'N/A'.\n\n"
+    "# EXAMPLE OUTPUT\n"
+    "```json\n"
+    "{\n"
+    "  \"title\": \"Digital Horizons 2026\",\n"
+    "  \"org\": \"Digital Horizons\",\n"
+    "  \"type\": \"Residency\",\n"
+    "  \"summary\": \"A residency for digital artists to explore VR. Includes a stipend.\",\n"
+    "  \"deadline\": \"15-11-2026\",\n"
+    "  \"location\": \"Berlin, Germany\"\n"
+    "}\n"
+    "```"
+)
+
+EVENT_PROMPT = (
+    "You are a precise Data Extraction Specialist. Your goal is to convert "
+    "unstructured event text into a strictly valid JSON object.\n\n"
+    "# TASK\n"
+    "Analyze the provided text and extract information into these JSON keys:\n"
+    "1. 'title': The name/title of the event\n"
+    "2. 'org': The name of the organizing body/bodies\n"
+    "3. 'date_time': The date and time of the event. Format: DD-MM-YYYY (HH:MM) or 'N/A' if not specified.\n"
+    "4. 'summary': A 3-sentence description of what the event is about.\n"
+    "5. 'location': The physical venue/city/country or 'Online'.\n\n"
+    "# CONSTRAINTS\n"
+    "- Return ONLY the JSON object inside markdown backticks (```json ... ```).\n"
+    "- Do NOT include any introductory or conversational text.\n"
+    "- If a field is missing, use 'N/A'.\n\n"
+    "# EXAMPLE OUTPUT\n"
+    "```json\n"
+    "{\n"
+    "  \"title\": \"Digital Arts Symposium 2026\",\n"
+    "  \"org\": \"Digital Arts Society\",\n"
+    "  \"date_time\": \"20-06-2026 14:00\",\n"
+    "  \"summary\": \"Join us for a day of talks and workshops exploring digital art. Meet artists and curators. Includes lunch and networking.\",\n"
+    "  \"location\": \"London, UK\"\n"
+    "}\n"
+    "```"
+)
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..93aeff6
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,189 @@
+ag-ui-protocol==0.1.18
+aiofile==3.9.0
+aiofiles==25.1.0
+aiohappyeyeballs==2.6.1
+aiohttp==3.13.5
+aiosignal==1.4.0
+aiosqlite==0.22.1
+alphashape==1.3.1
+annotated-doc==0.0.4
+annotated-types==0.7.0
+anthropic==0.100.0
+anyio==4.13.0
+argcomplete==3.6.3
+attrs==26.1.0
+Authlib==1.7.2
+beartype==0.22.9
+beautifulsoup4==4.14.3
+boto3==1.43.6
+botocore==1.43.6
+brotli==1.2.0
+cachetools==7.1.1
+caio==0.9.25
+certifi==2026.4.22
+cffi==2.0.0
+chardet==7.4.3
+charset-normalizer==3.4.7
+click==8.3.3
+click-log==0.4.0
+cohere==5.21.1
+colorama==0.4.6
+Crawl4AI==0.8.6
+cryptography==48.0.0
+cssselect==1.4.0
+cyclopts==4.11.2
+distro==1.9.0
+dnspython==2.8.0
+docstring_parser==0.18.0
+docutils==0.22.4
+email-validator==2.3.0
+eval_type_backport==0.3.1
+exceptiongroup==1.3.1
+executing==2.2.1
+fake-useragent==2.2.0
+fastavro==1.12.2
+fastmcp==3.2.4
+fastuuid==0.14.0
+filelock==3.29.0
+frozenlist==1.8.0
+fsspec==2026.4.0
+genai-prices==0.0.59
+google-auth==2.52.0
+google-genai==2.0.0
+googleapis-common-protos==1.75.0
+greenlet==3.5.0
+griffelib==2.0.2
+groq==1.2.0
+grpcio==1.80.0
+h11==0.16.0
+h2==4.3.0
+hf-xet==1.5.0
+hpack==4.1.0
+httpcore==1.0.9
+httpx==0.28.1
+httpx-sse==0.4.3
+huggingface_hub==1.14.0
+humanize==4.15.0
+hyperframe==6.1.0
+idna==3.13
+importlib_metadata==8.7.1
+jaraco.classes==3.4.0
+jaraco.context==6.1.2
+jaraco.functools==4.4.0
+Jinja2==3.1.6
+jiter==0.14.0
+jmespath==1.1.0
+joblib==1.5.3
+joserfc==1.6.5
+jsonpath-python==1.1.6
+jsonref==1.1.0
+jsonschema==4.26.0
+jsonschema-path==0.4.6
+jsonschema-specifications==2025.9.1
+keyring==25.7.0
+lark==1.3.1
+logfire==4.32.1
+logfire-api==4.32.1
+lxml==5.4.0
+markdown-it-py==4.2.0
+markdownify==1.2.2
+MarkupSafe==3.0.3
+mcp==1.27.1
+mdurl==0.1.2
+mistralai==2.4.5
+more-itertools==11.0.2
+multidict==6.7.1
+networkx==3.6.1
+nexus-rpc==1.4.0
+nltk==3.9.4
+numpy==2.4.4
+openai==2.36.0
+openapi-pydantic==0.5.1
+opentelemetry-api==1.39.1
+opentelemetry-exporter-otlp-proto-common==1.39.1
+opentelemetry-exporter-otlp-proto-http==1.39.1
+opentelemetry-instrumentation==0.60b1
+opentelemetry-instrumentation-httpx==0.60b1
+opentelemetry-proto==1.39.1
+opentelemetry-sdk==1.39.1
+opentelemetry-semantic-conventions==0.60b1
+opentelemetry-util-http==0.60b1
+packaging==25.0
+patchright==1.59.1
+pathable==0.5.0
+pillow==12.2.0
+platformdirs==4.9.6
+playwright==1.59.0
+playwright-stealth==2.0.3
+pocketbase==0.17.1
+prompt_toolkit==3.0.52
+propcache==0.5.2
+protobuf==6.33.6
+psutil==7.2.2
+py-key-value-aio==0.4.4
+pyasn1==0.6.3
+pyasn1_modules==0.4.2
+pycparser==3.0
+pydantic==2.13.4
+pydantic-ai==1.92.0
+pydantic-ai-slim==1.92.0
+pydantic-evals==1.92.0
+pydantic-graph==1.92.0
+pydantic-handlebars==0.1.0
+pydantic-settings==2.14.1
+pydantic_core==2.46.4
+pyee==13.0.1
+Pygments==2.20.0
+PyJWT==2.12.1
+pyOpenSSL==26.2.0
+pyperclip==1.11.0
+python-dateutil==2.9.0.post0
+python-dotenv==1.2.2
+python-multipart==0.0.27
+python-telegram-bot==22.7
+pywin32==311
+pywin32-ctypes==0.2.3
+PyYAML==6.0.3
+rank-bm25==0.2.2
+referencing==0.37.0
+regex==2026.4.4
+requests==2.33.1
+rich==15.0.0
+rich-rst==1.3.2
+rpds-py==0.30.0
+rtree==1.4.1
+s3transfer==0.17.0
+scipy==1.17.1
+setuptools==82.0.1
+shapely==2.1.2
+shellingham==1.5.4
+six==1.17.0
+sniffio==1.3.1
+snowballstemmer==2.2.0
+soupsieve==2.8.3
+sse-starlette==3.4.2
+starlette==1.0.0
+temporalio==1.27.0
+tenacity==9.1.4
+tiktoken==0.12.0
+tokenizers==0.23.1
+tqdm==4.67.3
+trimesh==4.12.2
+typer==0.25.1
+types-protobuf==6.32.1.20260221
+types-requests==2.33.0.20260508
+typing-inspection==0.4.2
+typing_extensions==4.15.0
+uncalled-for==0.3.2
+unclecode-litellm==1.81.13
+urllib3==2.7.0
+uvicorn==0.46.0
+watchfiles==1.1.1
+wcwidth==0.7.0
+websockets==16.0
+wheel==0.46.3
+wrapt==1.17.3
+xai-sdk==1.12.2
+xxhash==3.7.0
+yarl==1.23.0
+zipp==3.23.1
diff --git a/schemas.py b/schemas.py
new file mode 100644
index 0000000..568256b
--- /dev/null
+++ b/schemas.py
@@ -0,0 +1,21 @@
+from typing import Union, Literal
+from pydantic import BaseModel, Field
+
+## File doesn't do anything, its just an outline for the schemas
+
+class BaseEntry(BaseModel):
+    title:  str = Field(description="The name of the opportunity")
+    org:    str = Field(description="The organisation")
+    summary:str = Field(description="A 3 sentence summary of what this is")
+    
+class Event(BaseEntry):
+    type:       Literal["event"] = "event"
+    date_time:  str = Field(description="Date and time of the event")
+    location:   str = Field(description="Location of the event")
+
+class Opportunity(BaseEntry):
+    type:       str = Field(description="The type of opportunity (Open Call, Funding, Residency, etc.)")
+    deadline:   str = Field(description="What is the deadline in the format of dd-mm-yy")
+    location:   str = Field(description="Location of entry")
+
+EntrySchema = Union[Event, Opportunity]
\ No newline at end of file
diff --git a/scraper.py b/scraper.py
new file mode 100644
index 0000000..69a026e
--- /dev/null
+++ b/scraper.py
@@ -0,0 +1,29 @@
+from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, DefaultMarkdownGenerator
+
+async def get_clean_content(url: str):
+    md_generator = DefaultMarkdownGenerator(
+        options={
+            "ignore_links": True,
+            "ignore_images": True,
+            "body_width": 0,
+        }
+    )
+
+    browser_conf = BrowserConfig(
+        # cdp_url="http://127.0.0.1:9222",  # Use your existing Chrome session
+        user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36"
+    )
+
+    run_conf = CrawlerRunConfig(
+        cache_mode="bypass",
+        word_count_threshold=10,
+        excluded_tags=["nav", "script", "style"],
+        markdown_generator=md_generator,
+        delay_before_return_html=3.0,
+        js_code="window.scrollTo(0, document.body.scrollHeight);",
+        magic=True
+    )
+
+    async with AsyncWebCrawler(config=browser_conf) as crawler:
+        result = await crawler.arun(url=url, config=run_conf)
+        return result.markdown
\ No newline at end of file