gemini/blog/index.gen.py

61 lines
1.8 KiB
Python
Executable File

#!/usr/bin/env python
''' fetch (almost) all blog posts from Dotclear database and export them in Markdown format '''
''' TODO: use rss/atom feed instead '''
''' TODO: convert to proper Gemtext '''
import html2markdown
import mysql.connector
import re
import os
from urllib.parse import quote
path_regex = re.compile('\d+/\d+/\d+')
date_regex = re.compile('\d+-\d+-\d+')
filename_regex = re.compile('\d+/\d+/\d+/(.*)')
db_password_regex = re.compile("'DC_DBPASSWORD','(.*)'")
db_host_regex = re.compile("'DC_DBHOST','(.*)'")
db_name_regex = re.compile("'DC_DBNAME','(.*)'")
db_user_regex = re.compile("'DC_DBUSER','(.*)'")
f = open("/home/www/dotclear/inc/config.php", "r")
c = f.read()
f.close()
mydb = mysql.connector.connect(
host=db_host_regex.findall(c)[0],
user=db_user_regex.findall(c)[0],
password=db_password_regex.findall(c)[0],
database=db_name_regex.findall(c)[0]
)
mycursor = mydb.cursor()
mycursor.execute("SELECT post_url, post_title, post_content_xhtml FROM dc_post WHERE post_url LIKE '%/%/%/%' ORDER BY post_id DESC")
myresult = mycursor.fetchall()
f = open("index.gmi", "w")
f.close()
for x in myresult:
path = path_regex.findall(x[0])[0]
filename = filename_regex.findall(x[0])[0]
f = open("index.gmi", "a")
f.write("=> %s/%s.gmi %s %s\n" % (path, filename, path, x[1]))
f.close()
try:
if not os.path.exists(path):
os.makedirs(path)
except OSError:
print ("Creation of the directory %s failed" % path)
print("creating %s/%s.gmi" % (path,quote(filename)))
f = open("%s/%s.gmi" % (path,quote(filename)), "w")
f.write("# %s\n\n" % x[1])
f.write("Publié le %s\n\n" % path)
f.write(html2markdown.convert(x[2]))
f.write("\n\n=> /blog/ Retour au menu du blog")
f.close()