Cailean Finn
10 months ago
4 changed files with 116 additions and 35 deletions
@ -1,49 +1,78 @@ |
|||||
from flask import Flask, render_template |
from flask import Flask, render_template |
||||
import requests |
import requests |
||||
from bs4 import BeautifulSoup |
from bs4 import BeautifulSoup |
||||
import urllib.parse |
|
||||
|
|
||||
app = Flask(__name__) |
class WikiApp(Flask): |
||||
|
|
||||
# Base URL of your MediaWiki instance |
|
||||
MEDIAWIKI_BASE_URL = 'http://192.168.0.10/mediawiki/' |
|
||||
|
|
||||
# Define a route for the homepage |
|
||||
@app.route('/') |
|
||||
def homepage(): |
|
||||
# Make a request to MediaWiki API to get a list of all pages |
|
||||
response = requests.get('http://192.168.0.10/mediawiki/api.php', params={'action': 'query', 'list': 'allpages', 'format': 'json'}) |
|
||||
data = response.json() |
|
||||
|
|
||||
# Extract page titles from API response |
|
||||
pages = [page['title'] for page in data['query']['allpages']] |
|
||||
|
|
||||
# Fetch content for each page |
MEDIAWIKI_BASE_URL = 'https://wiki.conceptnull.org/' |
||||
page_contents = {} |
BASE_API = 'api.php?' |
||||
for page_title in pages: |
|
||||
page_response = requests.get('http://192.168.0.10/mediawiki/api.php', params={'action': 'parse', 'page': page_title, 'format': 'json'}) |
def __init__(self, *args, **kwargs): |
||||
page_data = page_response.json() |
super().__init__(*args, **kwargs) |
||||
page_html = page_data['parse']['text']['*'] |
|
||||
|
# Define routes |
||||
|
self.route('/', methods=['GET'])(self.homepage) |
||||
|
self.route('/<string:title>', methods=['GET'])(self.page_content) |
||||
|
|
||||
# Preprocess HTML content to fix image URLs |
def fetch_pages(self, category): |
||||
page_html = fix_image_urls(page_html) |
# Make a request to MediaWiki API using ask action to get all pages in the specified category |
||||
|
response = requests.get(self.MEDIAWIKI_BASE_URL + self.BASE_API, params={'action': 'ask', 'query': '[[Concept:'+category+']]', 'format': 'json', 'formatversion': '2'}) |
||||
|
data = response.json() |
||||
|
|
||||
# Parse HTML content to extract image URLs |
# Extract page titles |
||||
soup = BeautifulSoup(page_html, 'html.parser') |
page_titles = [page['fulltext'] for page in data['query']['results'].values()] |
||||
images = soup.find_all('img') |
return page_titles |
||||
image_urls = [urllib.parse.urljoin(MEDIAWIKI_BASE_URL, img['src']) for img in images] |
|
||||
|
def homepage(self): |
||||
|
# Fetch pages for articles, projects, and newsletters |
||||
|
articles = self.fetch_pages('Articles') |
||||
|
projects = self.fetch_pages('Projects') |
||||
|
newsletters = self.fetch_pages('Newsletters') |
||||
|
|
||||
# Store page content and image URLs |
return render_template('homepage.html', articles=articles, projects=projects, newsletters=newsletters) |
||||
page_contents[page_title] = {'content': page_html, 'images': image_urls} |
|
||||
|
def page_content(self, title): |
||||
|
# Make a request to MediaWiki API to get content of a specific page |
||||
|
response = requests.get(self.MEDIAWIKI_BASE_URL + self.BASE_API, params={'action': 'parse', 'page': title, 'format': 'json'}) |
||||
|
data = response.json() |
||||
|
# Extract page title and content |
||||
|
page_title = data['parse']['title'] |
||||
|
page_content = data['parse']['text']['*'] |
||||
|
page_content = self.fix_images(page_content) |
||||
|
return render_template('page_content.html', title=page_title, content=page_content) |
||||
|
|
||||
# Render the base template with the list of pages and their content |
def fix_images(self, page_content): |
||||
return render_template('base.html', pages=page_contents) |
soup = BeautifulSoup(page_content, 'html.parser') |
||||
|
|
||||
def fix_image_urls(html_content): |
# Find all img tags |
||||
# Replace relative image URLs with absolute URLs using MEDIAWIKI_BASE_URL |
images = soup.find_all('img') |
||||
return html_content.replace('src="/mediawiki', 'src="' + MEDIAWIKI_BASE_URL) |
|
||||
|
|
||||
# Define other routes and functions as needed for your website |
# Loop through each image and update the src attribute |
||||
|
for img in images: |
||||
|
# Append 'https://wiki.conceptnull.org' to the src attribute |
||||
|
img['src'] = 'https://wiki.conceptnull.org' + img['src'] |
||||
|
|
||||
|
# Find all a tags with href containing 'index.php' |
||||
|
links = soup.find_all('a', href=lambda href: href and 'index.php' in href) |
||||
|
|
||||
|
# Loop through each link and modify its href attribute |
||||
|
for link in links: |
||||
|
# Remove 'index.php' from the href attribute |
||||
|
link['href'] = link['href'].replace('/index.php', '') |
||||
|
|
||||
|
# Remove any element with class 'mw-editsection' |
||||
|
edit_sections = soup.find_all(class_='mw-editsection') |
||||
|
|
||||
|
for edit_section in edit_sections: |
||||
|
edit_section.decompose() |
||||
|
|
||||
|
# Remove any <a> tag's surrounding |
||||
|
file_description_tags = soup.find_all('a', class_='mw-file-description') |
||||
|
for file_link in file_description_tags: |
||||
|
file_link.unwrap() |
||||
|
|
||||
|
return soup.prettify() |
||||
|
|
||||
if __name__ == '__main__': |
if __name__ == '__main__': |
||||
|
app = WikiApp(__name__) |
||||
app.run(debug=True) |
app.run(debug=True) |
||||
|
@ -0,0 +1,9 @@ |
|||||
|
<!DOCTYPE html> |
||||
|
<html> |
||||
|
<head> |
||||
|
<title>My Wiki</title> |
||||
|
</head> |
||||
|
<body> |
||||
|
<h1>Welcome to My Wiki</h1> |
||||
|
</body> |
||||
|
</html> |
@ -0,0 +1,29 @@ |
|||||
|
<!DOCTYPE html> |
||||
|
<html lang="en"> |
||||
|
<head> |
||||
|
<meta charset="UTF-8"> |
||||
|
<meta name="viewport" content="width=device-width, initial-scale=1.0"> |
||||
|
<title>Homepage</title> |
||||
|
</head> |
||||
|
<body> |
||||
|
<h1>Homepage</h1> |
||||
|
<h2>Articles</h2> |
||||
|
<ul> |
||||
|
{% for article_title in articles %} |
||||
|
<li><a href="{{ url_for('page_content', title=article_title) }}">{{ article_title }}</a></li> |
||||
|
{% endfor %} |
||||
|
</ul> |
||||
|
<h2>Projects</h2> |
||||
|
<ul> |
||||
|
{% for project_title in projects %} |
||||
|
<li><a href="{{ url_for('page_content', title=project_title) }}">{{ project_title }}</a></li> |
||||
|
{% endfor %} |
||||
|
</ul> |
||||
|
<h2>Newsletters</h2> |
||||
|
<ul> |
||||
|
{% for newsletter_title in newsletters %} |
||||
|
<li><a href="{{ url_for('page_content', title=newsletter_title) }}">{{ newsletter_title }}</a></li> |
||||
|
{% endfor %} |
||||
|
</ul> |
||||
|
</body> |
||||
|
</html> |
@ -0,0 +1,14 @@ |
|||||
|
<!DOCTYPE html> |
||||
|
<html lang="en"> |
||||
|
<head> |
||||
|
<meta charset="UTF-8"> |
||||
|
<meta name="viewport" content="width=device-width, initial-scale=1.0"> |
||||
|
<title>{{ title }}</title> |
||||
|
</head> |
||||
|
<body> |
||||
|
<h1>{{ title }}</h1> |
||||
|
<div> |
||||
|
{{ content | safe }} |
||||
|
</div> |
||||
|
</body> |
||||
|
</html> |
Loading…
Reference in new issue