@ -1,49 +1,78 @@ |
from flask import Flask, render_template |
import requests |
from bs4 import BeautifulSoup |
import urllib.parse |
app = Flask(__name__) |
# Base URL of your MediaWiki instance |
# Define a route for the homepage |
@app.route('/') |
def homepage(): |
# Make a request to MediaWiki API to get a list of all pages |
response = requests.get('', params={'action': 'query', 'list': 'allpages', 'format': 'json'}) |
data = response.json() |
# Extract page titles from API response |
pages = [page['title'] for page in data['query']['allpages']] |
class WikiApp(Flask): |
# Fetch content for each page |
page_contents = {} |
for page_title in pages: |
page_response = requests.get('', params={'action': 'parse', 'page': page_title, 'format': 'json'}) |
page_data = page_response.json() |
page_html = page_data['parse']['text']['*'] |
MEDIAWIKI_BASE_URL = 'https://wiki.conceptnull.org/' |
BASE_API = 'api.php?' |
def __init__(self, *args, **kwargs): |
super().__init__(*args, **kwargs) |
# Define routes |
self.route('/', methods=['GET'])(self.homepage) |
self.route('/<string:title>', methods=['GET'])(self.page_content) |
# Preprocess HTML content to fix image URLs |
page_html = fix_image_urls(page_html) |
def fetch_pages(self, category): |
# Make a request to MediaWiki API using ask action to get all pages in the specified category |
response = requests.get(self.MEDIAWIKI_BASE_URL + self.BASE_API, params={'action': 'ask', 'query': '[[Concept:'+category+']]', 'format': 'json', 'formatversion': '2'}) |
data = response.json() |
# Parse HTML content to extract image URLs |
soup = BeautifulSoup(page_html, 'html.parser') |
images = soup.find_all('img') |
image_urls = [urllib.parse.urljoin(MEDIAWIKI_BASE_URL, img['src']) for img in images] |
# Extract page titles |
page_titles = [page['fulltext'] for page in data['query']['results'].values()] |
return page_titles |
def homepage(self): |
# Fetch pages for articles, projects, and newsletters |
articles = self.fetch_pages('Articles') |
projects = self.fetch_pages('Projects') |
newsletters = self.fetch_pages('Newsletters') |
# Store page content and image URLs |
page_contents[page_title] = {'content': page_html, 'images': image_urls} |
return render_template('homepage.html', articles=articles, projects=projects, newsletters=newsletters) |
def page_content(self, title): |
# Make a request to MediaWiki API to get content of a specific page |
response = requests.get(self.MEDIAWIKI_BASE_URL + self.BASE_API, params={'action': 'parse', 'page': title, 'format': 'json'}) |
data = response.json() |
# Extract page title and content |
page_title = data['parse']['title'] |
page_content = data['parse']['text']['*'] |
page_content = self.fix_images(page_content) |
return render_template('page_content.html', title=page_title, content=page_content) |
# Render the base template with the list of pages and their content |
return render_template('base.html', pages=page_contents) |
def fix_images(self, page_content): |
soup = BeautifulSoup(page_content, 'html.parser') |
def fix_image_urls(html_content): |
# Replace relative image URLs with absolute URLs using MEDIAWIKI_BASE_URL |
return html_content.replace('src="/mediawiki', 'src="' + MEDIAWIKI_BASE_URL) |
# Find all img tags |
images = soup.find_all('img') |
# Define other routes and functions as needed for your website |
# Loop through each image and update the src attribute |
for img in images: |
# Append 'https://wiki.conceptnull.org' to the src attribute |
img['src'] = 'https://wiki.conceptnull.org' + img['src'] |
# Find all a tags with href containing 'index.php' |
links = soup.find_all('a', href=lambda href: href and 'index.php' in href) |
# Loop through each link and modify its href attribute |
for link in links: |
# Remove 'index.php' from the href attribute |
link['href'] = link['href'].replace('/index.php', '') |
# Remove any element with class 'mw-editsection' |
edit_sections = soup.find_all(class_='mw-editsection') |
for edit_section in edit_sections: |
edit_section.decompose() |
# Remove any <a> tag's surrounding |
file_description_tags = soup.find_all('a', class_='mw-file-description') |
for file_link in file_description_tags: |
file_link.unwrap() |
return soup.prettify() |
if __name__ == '__main__': |
app = WikiApp(__name__) |
app.run(debug=True) |