From 40b0be2cf0033a11c4e815952b131f6de17e3713 Mon Sep 17 00:00:00 2001 From: Cailean Finn Date: Thu, 4 Apr 2024 18:24:11 +0100 Subject: [PATCH] updated app --- app.py | 99 ++++++++++++++++++++++++------------- templates/dev_base.html | 9 ++++ templates/homepage.html | 29 +++++++++++ templates/page_content.html | 14 ++++++ 4 files changed, 116 insertions(+), 35 deletions(-) create mode 100644 templates/dev_base.html create mode 100644 templates/homepage.html create mode 100644 templates/page_content.html diff --git a/app.py b/app.py index 83883b5..830c1ec 100644 --- a/app.py +++ b/app.py @@ -1,49 +1,78 @@ from flask import Flask, render_template import requests from bs4 import BeautifulSoup -import urllib.parse -app = Flask(__name__) - -# Base URL of your MediaWiki instance -MEDIAWIKI_BASE_URL = 'http://192.168.0.10/mediawiki/' - -# Define a route for the homepage -@app.route('/') -def homepage(): - # Make a request to MediaWiki API to get a list of all pages - response = requests.get('http://192.168.0.10/mediawiki/api.php', params={'action': 'query', 'list': 'allpages', 'format': 'json'}) - data = response.json() - - # Extract page titles from API response - pages = [page['title'] for page in data['query']['allpages']] +class WikiApp(Flask): - # Fetch content for each page - page_contents = {} - for page_title in pages: - page_response = requests.get('http://192.168.0.10/mediawiki/api.php', params={'action': 'parse', 'page': page_title, 'format': 'json'}) - page_data = page_response.json() - page_html = page_data['parse']['text']['*'] + MEDIAWIKI_BASE_URL = 'https://wiki.conceptnull.org/' + BASE_API = 'api.php?' + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + # Define routes + self.route('/', methods=['GET'])(self.homepage) + self.route('/', methods=['GET'])(self.page_content) - # Preprocess HTML content to fix image URLs - page_html = fix_image_urls(page_html) + def fetch_pages(self, category): + # Make a request to MediaWiki API using ask action to get all pages in the specified category + response = requests.get(self.MEDIAWIKI_BASE_URL + self.BASE_API, params={'action': 'ask', 'query': '[[Concept:'+category+']]', 'format': 'json', 'formatversion': '2'}) + data = response.json() - # Parse HTML content to extract image URLs - soup = BeautifulSoup(page_html, 'html.parser') - images = soup.find_all('img') - image_urls = [urllib.parse.urljoin(MEDIAWIKI_BASE_URL, img['src']) for img in images] + # Extract page titles + page_titles = [page['fulltext'] for page in data['query']['results'].values()] + return page_titles + + def homepage(self): + # Fetch pages for articles, projects, and newsletters + articles = self.fetch_pages('Articles') + projects = self.fetch_pages('Projects') + newsletters = self.fetch_pages('Newsletters') - # Store page content and image URLs - page_contents[page_title] = {'content': page_html, 'images': image_urls} + return render_template('homepage.html', articles=articles, projects=projects, newsletters=newsletters) + + def page_content(self, title): + # Make a request to MediaWiki API to get content of a specific page + response = requests.get(self.MEDIAWIKI_BASE_URL + self.BASE_API, params={'action': 'parse', 'page': title, 'format': 'json'}) + data = response.json() + # Extract page title and content + page_title = data['parse']['title'] + page_content = data['parse']['text']['*'] + page_content = self.fix_images(page_content) + return render_template('page_content.html', title=page_title, content=page_content) - # Render the base template with the list of pages and their content - return render_template('base.html', pages=page_contents) + def fix_images(self, page_content): + soup = BeautifulSoup(page_content, 'html.parser') -def fix_image_urls(html_content): - # Replace relative image URLs with absolute URLs using MEDIAWIKI_BASE_URL - return html_content.replace('src="/mediawiki', 'src="' + MEDIAWIKI_BASE_URL) + # Find all img tags + images = soup.find_all('img') -# Define other routes and functions as needed for your website + # Loop through each image and update the src attribute + for img in images: + # Append 'https://wiki.conceptnull.org' to the src attribute + img['src'] = 'https://wiki.conceptnull.org' + img['src'] + + # Find all a tags with href containing 'index.php' + links = soup.find_all('a', href=lambda href: href and 'index.php' in href) + + # Loop through each link and modify its href attribute + for link in links: + # Remove 'index.php' from the href attribute + link['href'] = link['href'].replace('/index.php', '') + + # Remove any element with class 'mw-editsection' + edit_sections = soup.find_all(class_='mw-editsection') + + for edit_section in edit_sections: + edit_section.decompose() + + # Remove any tag's surrounding + file_description_tags = soup.find_all('a', class_='mw-file-description') + for file_link in file_description_tags: + file_link.unwrap() + + return soup.prettify() if __name__ == '__main__': + app = WikiApp(__name__) app.run(debug=True) diff --git a/templates/dev_base.html b/templates/dev_base.html new file mode 100644 index 0000000..8c0b19c --- /dev/null +++ b/templates/dev_base.html @@ -0,0 +1,9 @@ + + + + My Wiki + + +

Welcome to My Wiki

+ + \ No newline at end of file diff --git a/templates/homepage.html b/templates/homepage.html new file mode 100644 index 0000000..361a49e --- /dev/null +++ b/templates/homepage.html @@ -0,0 +1,29 @@ + + + + + + Homepage + + +

Homepage

+

Articles

+
+

Projects

+ +

Newsletters

+ + + diff --git a/templates/page_content.html b/templates/page_content.html new file mode 100644 index 0000000..2b70006 --- /dev/null +++ b/templates/page_content.html @@ -0,0 +1,14 @@ + + + + + + {{ title }} + + +

{{ title }}

+
+ {{ content | safe }} +
+ +