|
|
@ -1,49 +1,78 @@ |
|
|
|
from flask import Flask, render_template |
|
|
|
import requests |
|
|
|
from bs4 import BeautifulSoup |
|
|
|
import urllib.parse |
|
|
|
|
|
|
|
app = Flask(__name__) |
|
|
|
|
|
|
|
# Base URL of your MediaWiki instance |
|
|
|
MEDIAWIKI_BASE_URL = 'http://192.168.0.10/mediawiki/' |
|
|
|
|
|
|
|
# Define a route for the homepage |
|
|
|
@app.route('/') |
|
|
|
def homepage(): |
|
|
|
# Make a request to MediaWiki API to get a list of all pages |
|
|
|
response = requests.get('http://192.168.0.10/mediawiki/api.php', params={'action': 'query', 'list': 'allpages', 'format': 'json'}) |
|
|
|
data = response.json() |
|
|
|
|
|
|
|
# Extract page titles from API response |
|
|
|
pages = [page['title'] for page in data['query']['allpages']] |
|
|
|
class WikiApp(Flask): |
|
|
|
|
|
|
|
# Fetch content for each page |
|
|
|
page_contents = {} |
|
|
|
for page_title in pages: |
|
|
|
page_response = requests.get('http://192.168.0.10/mediawiki/api.php', params={'action': 'parse', 'page': page_title, 'format': 'json'}) |
|
|
|
page_data = page_response.json() |
|
|
|
page_html = page_data['parse']['text']['*'] |
|
|
|
MEDIAWIKI_BASE_URL = 'https://wiki.conceptnull.org/' |
|
|
|
BASE_API = 'api.php?' |
|
|
|
|
|
|
|
def __init__(self, *args, **kwargs): |
|
|
|
super().__init__(*args, **kwargs) |
|
|
|
|
|
|
|
# Define routes |
|
|
|
self.route('/', methods=['GET'])(self.homepage) |
|
|
|
self.route('/<string:title>', methods=['GET'])(self.page_content) |
|
|
|
|
|
|
|
# Preprocess HTML content to fix image URLs |
|
|
|
page_html = fix_image_urls(page_html) |
|
|
|
def fetch_pages(self, category): |
|
|
|
# Make a request to MediaWiki API using ask action to get all pages in the specified category |
|
|
|
response = requests.get(self.MEDIAWIKI_BASE_URL + self.BASE_API, params={'action': 'ask', 'query': '[[Concept:'+category+']]', 'format': 'json', 'formatversion': '2'}) |
|
|
|
data = response.json() |
|
|
|
|
|
|
|
# Parse HTML content to extract image URLs |
|
|
|
soup = BeautifulSoup(page_html, 'html.parser') |
|
|
|
images = soup.find_all('img') |
|
|
|
image_urls = [urllib.parse.urljoin(MEDIAWIKI_BASE_URL, img['src']) for img in images] |
|
|
|
# Extract page titles |
|
|
|
page_titles = [page['fulltext'] for page in data['query']['results'].values()] |
|
|
|
return page_titles |
|
|
|
|
|
|
|
def homepage(self): |
|
|
|
# Fetch pages for articles, projects, and newsletters |
|
|
|
articles = self.fetch_pages('Articles') |
|
|
|
projects = self.fetch_pages('Projects') |
|
|
|
newsletters = self.fetch_pages('Newsletters') |
|
|
|
|
|
|
|
# Store page content and image URLs |
|
|
|
page_contents[page_title] = {'content': page_html, 'images': image_urls} |
|
|
|
return render_template('homepage.html', articles=articles, projects=projects, newsletters=newsletters) |
|
|
|
|
|
|
|
def page_content(self, title): |
|
|
|
# Make a request to MediaWiki API to get content of a specific page |
|
|
|
response = requests.get(self.MEDIAWIKI_BASE_URL + self.BASE_API, params={'action': 'parse', 'page': title, 'format': 'json'}) |
|
|
|
data = response.json() |
|
|
|
# Extract page title and content |
|
|
|
page_title = data['parse']['title'] |
|
|
|
page_content = data['parse']['text']['*'] |
|
|
|
page_content = self.fix_images(page_content) |
|
|
|
return render_template('page_content.html', title=page_title, content=page_content) |
|
|
|
|
|
|
|
# Render the base template with the list of pages and their content |
|
|
|
return render_template('base.html', pages=page_contents) |
|
|
|
def fix_images(self, page_content): |
|
|
|
soup = BeautifulSoup(page_content, 'html.parser') |
|
|
|
|
|
|
|
def fix_image_urls(html_content): |
|
|
|
# Replace relative image URLs with absolute URLs using MEDIAWIKI_BASE_URL |
|
|
|
return html_content.replace('src="/mediawiki', 'src="' + MEDIAWIKI_BASE_URL) |
|
|
|
# Find all img tags |
|
|
|
images = soup.find_all('img') |
|
|
|
|
|
|
|
# Define other routes and functions as needed for your website |
|
|
|
# Loop through each image and update the src attribute |
|
|
|
for img in images: |
|
|
|
# Append 'https://wiki.conceptnull.org' to the src attribute |
|
|
|
img['src'] = 'https://wiki.conceptnull.org' + img['src'] |
|
|
|
|
|
|
|
# Find all a tags with href containing 'index.php' |
|
|
|
links = soup.find_all('a', href=lambda href: href and 'index.php' in href) |
|
|
|
|
|
|
|
# Loop through each link and modify its href attribute |
|
|
|
for link in links: |
|
|
|
# Remove 'index.php' from the href attribute |
|
|
|
link['href'] = link['href'].replace('/index.php', '') |
|
|
|
|
|
|
|
# Remove any element with class 'mw-editsection' |
|
|
|
edit_sections = soup.find_all(class_='mw-editsection') |
|
|
|
|
|
|
|
for edit_section in edit_sections: |
|
|
|
edit_section.decompose() |
|
|
|
|
|
|
|
# Remove any <a> tag's surrounding |
|
|
|
file_description_tags = soup.find_all('a', class_='mw-file-description') |
|
|
|
for file_link in file_description_tags: |
|
|
|
file_link.unwrap() |
|
|
|
|
|
|
|
return soup.prettify() |
|
|
|
|
|
|
|
if __name__ == '__main__': |
|
|
|
app = WikiApp(__name__) |
|
|
|
app.run(debug=True) |
|
|
|