updated app

1 year ago · 40b0be2cf0
4 changed files with 116 additions and 35 deletions
--- a/app.py
+++ b/app.py
@ -1,49 +1,78 @@
 from flask import Flask, render_template
 import requests
 from bs4 import BeautifulSoup
-import urllib.parse

-app = Flask(__name__)
-
-# Base URL of your MediaWiki instance
-MEDIAWIKI_BASE_URL = 'http://192.168.0.10/mediawiki/'
-
-# Define a route for the homepage
-@app.route('/')
-def homepage():
-    # Make a request to MediaWiki API to get a list of all pages
-    response = requests.get('http://192.168.0.10/mediawiki/api.php', params={'action': 'query', 'list': 'allpages', 'format': 'json'})
-    data = response.json()
-    
-    # Extract page titles from API response
-    pages = [page['title'] for page in data['query']['allpages']]
+class WikiApp(Flask):
    
-    # Fetch content for each page
-    page_contents = {}
-    for page_title in pages:
-        page_response = requests.get('http://192.168.0.10/mediawiki/api.php', params={'action': 'parse', 'page': page_title, 'format': 'json'})
-        page_data = page_response.json()
-        page_html = page_data['parse']['text']['*']
+    MEDIAWIKI_BASE_URL = 'https://wiki.conceptnull.org/'
+    BASE_API = 'api.php?'
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+                
+        # Define routes
+        self.route('/', methods=['GET'])(self.homepage)
+        self.route('/<string:title>', methods=['GET'])(self.page_content)
        
-        # Preprocess HTML content to fix image URLs
-        page_html = fix_image_urls(page_html)
+    def fetch_pages(self, category):
+        # Make a request to MediaWiki API using ask action to get all pages in the specified category
+        response = requests.get(self.MEDIAWIKI_BASE_URL + self.BASE_API, params={'action': 'ask', 'query': '[[Concept:'+category+']]', 'format': 'json', 'formatversion': '2'})
+        data = response.json()
        
-        # Parse HTML content to extract image URLs
-        soup = BeautifulSoup(page_html, 'html.parser')
-        images = soup.find_all('img')
-        image_urls = [urllib.parse.urljoin(MEDIAWIKI_BASE_URL, img['src']) for img in images]
+        # Extract page titles
+        page_titles = [page['fulltext'] for page in data['query']['results'].values()]
+        return page_titles
+    
+    def homepage(self):
+        # Fetch pages for articles, projects, and newsletters
+        articles = self.fetch_pages('Articles')
+        projects = self.fetch_pages('Projects')
+        newsletters = self.fetch_pages('Newsletters')
        
-        # Store page content and image URLs
-        page_contents[page_title] = {'content': page_html, 'images': image_urls}
+        return render_template('homepage.html', articles=articles, projects=projects, newsletters=newsletters)
+    
+    def page_content(self, title):
+        # Make a request to MediaWiki API to get content of a specific page
+        response = requests.get(self.MEDIAWIKI_BASE_URL + self.BASE_API, params={'action': 'parse', 'page': title, 'format': 'json'})
+        data = response.json()
+        # Extract page title and content
+        page_title = data['parse']['title']
+        page_content = data['parse']['text']['*']
+        page_content = self.fix_images(page_content)
+        return render_template('page_content.html', title=page_title, content=page_content)
    
-    # Render the base template with the list of pages and their content
-    return render_template('base.html', pages=page_contents)
+    def fix_images(self, page_content):
+        soup = BeautifulSoup(page_content, 'html.parser')

-def fix_image_urls(html_content):
-    # Replace relative image URLs with absolute URLs using MEDIAWIKI_BASE_URL
-    return html_content.replace('src="/mediawiki', 'src="' + MEDIAWIKI_BASE_URL)
+        # Find all img tags
+        images = soup.find_all('img')

-# Define other routes and functions as needed for your website
+        # Loop through each image and update the src attribute
+        for img in images:
+            # Append 'https://wiki.conceptnull.org' to the src attribute
+            img['src'] = 'https://wiki.conceptnull.org' + img['src']
+              
+        # Find all a tags with href containing 'index.php'
+        links = soup.find_all('a', href=lambda href: href and 'index.php' in href)
+
+        # Loop through each link and modify its href attribute
+        for link in links:
+            # Remove 'index.php' from the href attribute
+            link['href'] = link['href'].replace('/index.php', '')
+       
+        # Remove any element with class 'mw-editsection'
+        edit_sections = soup.find_all(class_='mw-editsection')
+      
+        for edit_section in edit_sections:
+            edit_section.decompose()
+            
+        # Remove any <a> tag's surrounding 
+        file_description_tags = soup.find_all('a', class_='mw-file-description')
+        for file_link in file_description_tags:
+            file_link.unwrap()
+        
+        return soup.prettify()

 if __name__ == '__main__':
+    app = WikiApp(__name__)
    app.run(debug=True)
--- a/templates/dev_base.html
+++ b/templates/dev_base.html
@ -0,0 +1,9 @@
+<!DOCTYPE html>
+<html>
+<head>
+    <title>My Wiki</title>
+</head>
+<body>
+    <h1>Welcome to My Wiki</h1>
+</body>
+</html>
--- a/templates/homepage.html
+++ b/templates/homepage.html
@ -0,0 +1,29 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Homepage</title>
+</head>
+<body>
+    <h1>Homepage</h1>
+    <h2>Articles</h2>
+    <ul>
+        {% for article_title in articles %}
+            <li><a href="{{ url_for('page_content', title=article_title) }}">{{ article_title }}</a></li>
+        {% endfor %}
+    </ul>
+    <h2>Projects</h2>
+    <ul>
+        {% for project_title in projects %}
+            <li><a href="{{ url_for('page_content', title=project_title) }}">{{ project_title }}</a></li>
+        {% endfor %}
+    </ul>
+    <h2>Newsletters</h2>
+    <ul>
+        {% for newsletter_title in newsletters %}
+            <li><a href="{{ url_for('page_content', title=newsletter_title) }}">{{ newsletter_title }}</a></li>
+        {% endfor %}
+    </ul>
+</body>
+</html>
--- a/templates/page_content.html
+++ b/templates/page_content.html
@ -0,0 +1,14 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>{{ title }}</title>
+</head>
+<body>
+    <h1>{{ title }}</h1>
+    <div>
+        {{ content | safe }}
+    </div>
+</body>
+</html>