Cailean Finn
10 months ago
4 changed files with 116 additions and 35 deletions
@ -1,49 +1,78 @@ |
|||
from flask import Flask, render_template |
|||
import requests |
|||
from bs4 import BeautifulSoup |
|||
import urllib.parse |
|||
|
|||
app = Flask(__name__) |
|||
class WikiApp(Flask): |
|||
|
|||
# Base URL of your MediaWiki instance |
|||
MEDIAWIKI_BASE_URL = 'http://192.168.0.10/mediawiki/' |
|||
MEDIAWIKI_BASE_URL = 'https://wiki.conceptnull.org/' |
|||
BASE_API = 'api.php?' |
|||
|
|||
# Define a route for the homepage |
|||
@app.route('/') |
|||
def homepage(): |
|||
# Make a request to MediaWiki API to get a list of all pages |
|||
response = requests.get('http://192.168.0.10/mediawiki/api.php', params={'action': 'query', 'list': 'allpages', 'format': 'json'}) |
|||
def __init__(self, *args, **kwargs): |
|||
super().__init__(*args, **kwargs) |
|||
|
|||
# Define routes |
|||
self.route('/', methods=['GET'])(self.homepage) |
|||
self.route('/<string:title>', methods=['GET'])(self.page_content) |
|||
|
|||
def fetch_pages(self, category): |
|||
# Make a request to MediaWiki API using ask action to get all pages in the specified category |
|||
response = requests.get(self.MEDIAWIKI_BASE_URL + self.BASE_API, params={'action': 'ask', 'query': '[[Concept:'+category+']]', 'format': 'json', 'formatversion': '2'}) |
|||
data = response.json() |
|||
|
|||
# Extract page titles from API response |
|||
pages = [page['title'] for page in data['query']['allpages']] |
|||
# Extract page titles |
|||
page_titles = [page['fulltext'] for page in data['query']['results'].values()] |
|||
return page_titles |
|||
|
|||
def homepage(self): |
|||
# Fetch pages for articles, projects, and newsletters |
|||
articles = self.fetch_pages('Articles') |
|||
projects = self.fetch_pages('Projects') |
|||
newsletters = self.fetch_pages('Newsletters') |
|||
|
|||
return render_template('homepage.html', articles=articles, projects=projects, newsletters=newsletters) |
|||
|
|||
# Fetch content for each page |
|||
page_contents = {} |
|||
for page_title in pages: |
|||
page_response = requests.get('http://192.168.0.10/mediawiki/api.php', params={'action': 'parse', 'page': page_title, 'format': 'json'}) |
|||
page_data = page_response.json() |
|||
page_html = page_data['parse']['text']['*'] |
|||
def page_content(self, title): |
|||
# Make a request to MediaWiki API to get content of a specific page |
|||
response = requests.get(self.MEDIAWIKI_BASE_URL + self.BASE_API, params={'action': 'parse', 'page': title, 'format': 'json'}) |
|||
data = response.json() |
|||
# Extract page title and content |
|||
page_title = data['parse']['title'] |
|||
page_content = data['parse']['text']['*'] |
|||
page_content = self.fix_images(page_content) |
|||
return render_template('page_content.html', title=page_title, content=page_content) |
|||
|
|||
# Preprocess HTML content to fix image URLs |
|||
page_html = fix_image_urls(page_html) |
|||
def fix_images(self, page_content): |
|||
soup = BeautifulSoup(page_content, 'html.parser') |
|||
|
|||
# Parse HTML content to extract image URLs |
|||
soup = BeautifulSoup(page_html, 'html.parser') |
|||
# Find all img tags |
|||
images = soup.find_all('img') |
|||
image_urls = [urllib.parse.urljoin(MEDIAWIKI_BASE_URL, img['src']) for img in images] |
|||
|
|||
# Store page content and image URLs |
|||
page_contents[page_title] = {'content': page_html, 'images': image_urls} |
|||
# Loop through each image and update the src attribute |
|||
for img in images: |
|||
# Append 'https://wiki.conceptnull.org' to the src attribute |
|||
img['src'] = 'https://wiki.conceptnull.org' + img['src'] |
|||
|
|||
# Find all a tags with href containing 'index.php' |
|||
links = soup.find_all('a', href=lambda href: href and 'index.php' in href) |
|||
|
|||
# Loop through each link and modify its href attribute |
|||
for link in links: |
|||
# Remove 'index.php' from the href attribute |
|||
link['href'] = link['href'].replace('/index.php', '') |
|||
|
|||
# Remove any element with class 'mw-editsection' |
|||
edit_sections = soup.find_all(class_='mw-editsection') |
|||
|
|||
# Render the base template with the list of pages and their content |
|||
return render_template('base.html', pages=page_contents) |
|||
for edit_section in edit_sections: |
|||
edit_section.decompose() |
|||
|
|||
def fix_image_urls(html_content): |
|||
# Replace relative image URLs with absolute URLs using MEDIAWIKI_BASE_URL |
|||
return html_content.replace('src="/mediawiki', 'src="' + MEDIAWIKI_BASE_URL) |
|||
# Remove any <a> tag's surrounding |
|||
file_description_tags = soup.find_all('a', class_='mw-file-description') |
|||
for file_link in file_description_tags: |
|||
file_link.unwrap() |
|||
|
|||
# Define other routes and functions as needed for your website |
|||
return soup.prettify() |
|||
|
|||
if __name__ == '__main__': |
|||
app = WikiApp(__name__) |
|||
app.run(debug=True) |
|||
|
@ -0,0 +1,9 @@ |
|||
<!DOCTYPE html> |
|||
<html> |
|||
<head> |
|||
<title>My Wiki</title> |
|||
</head> |
|||
<body> |
|||
<h1>Welcome to My Wiki</h1> |
|||
</body> |
|||
</html> |
@ -0,0 +1,29 @@ |
|||
<!DOCTYPE html> |
|||
<html lang="en"> |
|||
<head> |
|||
<meta charset="UTF-8"> |
|||
<meta name="viewport" content="width=device-width, initial-scale=1.0"> |
|||
<title>Homepage</title> |
|||
</head> |
|||
<body> |
|||
<h1>Homepage</h1> |
|||
<h2>Articles</h2> |
|||
<ul> |
|||
{% for article_title in articles %} |
|||
<li><a href="{{ url_for('page_content', title=article_title) }}">{{ article_title }}</a></li> |
|||
{% endfor %} |
|||
</ul> |
|||
<h2>Projects</h2> |
|||
<ul> |
|||
{% for project_title in projects %} |
|||
<li><a href="{{ url_for('page_content', title=project_title) }}">{{ project_title }}</a></li> |
|||
{% endfor %} |
|||
</ul> |
|||
<h2>Newsletters</h2> |
|||
<ul> |
|||
{% for newsletter_title in newsletters %} |
|||
<li><a href="{{ url_for('page_content', title=newsletter_title) }}">{{ newsletter_title }}</a></li> |
|||
{% endfor %} |
|||
</ul> |
|||
</body> |
|||
</html> |
@ -0,0 +1,14 @@ |
|||
<!DOCTYPE html> |
|||
<html lang="en"> |
|||
<head> |
|||
<meta charset="UTF-8"> |
|||
<meta name="viewport" content="width=device-width, initial-scale=1.0"> |
|||
<title>{{ title }}</title> |
|||
</head> |
|||
<body> |
|||
<h1>{{ title }}</h1> |
|||
<div> |
|||
{{ content | safe }} |
|||
</div> |
|||
</body> |
|||
</html> |
Loading…
Reference in new issue