gemini/blog/index.gen.py

64 lines
2.1 KiB
Python
Executable File

#!/usr/bin/env python
'''fetch (almost) all blog posts from Dotclear database and export them in Markdown format
TODO: use rss/atom feed instead
TODO: convert to proper Gemtext'''
import os
import re
from urllib.parse import quote
import html2markdown
import mysql.connector
path_regex = re.compile('\\d+/\\d+/\\d+')
date_regex = re.compile('\\d+-\\d+-\\d+')
filename_regex = re.compile('\\d+/\\d+/\\d+/(.*)')
db_password_regex = re.compile("'DC_DBPASSWORD','(.*)'")
db_host_regex = re.compile("'DC_DBHOST','(.*)'")
db_name_regex = re.compile("'DC_DBNAME','(.*)'")
db_user_regex = re.compile("'DC_DBUSER','(.*)'")
with open("/home/www/dotclear/inc/config.php", "r", encoding="utf-8") as f:
c = f.read()
mydb = mysql.connector.connect(
host=db_host_regex.findall(c)[0],
user=db_user_regex.findall(c)[0],
password=db_password_regex.findall(c)[0],
database=db_name_regex.findall(c)[0]
)
mycursor = mydb.cursor()
mycursor.execute("SELECT post_url, post_title, post_content_xhtml \
FROM dc_post WHERE post_url LIKE '%/%/%/%' ORDER BY post_id DESC")
myresult = mycursor.fetchall()
mydb.close()
with open("index.gmi", "w", encoding="utf-8") as f:
f.write("# Tourmentine's blog-to-gemlog\n\n")
for x in myresult:
path = path_regex.findall(x[0])[0]
path_with_dashes = path.replace("/","-")
filename = quote(quote(filename_regex.findall(x[0])[0]))
with open("index.gmi", "a", encoding="utf-8") as f:
f.write(f'=> {path}/{filename}.gmi {path_with_dashes} - {x[1]}\n')
try:
if not os.path.exists(path):
os.makedirs(path)
except OSError:
print (f"Creation of the directory {path} failed")
if not os.path.isfile(f'{path}/"{filename}".gmi'):
print(f"creating {path}/\"{filename}\".gmi")
with open(f'{path}/"{filename}".gmi', "w", encoding="utf-8") as f:
f.write(f"# {x[1]}\n\n")
f.write(f"Publié le {path}\n\n")
f.write(html2markdown.convert(x[2]))
f.write("\n\n=> /blog/ Retour au menu du blog")
with open("index.gmi", "a", encoding="utf-8") as f:
f.write("\n=> Retour")