#!/usr/bin/env python ''' fetch (almost) all blog posts from Dotclear database and export them in Markdown format ''' ''' TODO: use rss/atom feed instead ''' ''' TODO: convert to proper Gemtext ''' import os import re from urllib.parse import quote import html2markdown import mysql.connector path_regex = re.compile('\\d+/\\d+/\\d+') date_regex = re.compile('\\d+-\\d+-\\d+') filename_regex = re.compile('\\d+/\\d+/\\d+/(.*)') db_password_regex = re.compile("'DC_DBPASSWORD','(.*)'") db_host_regex = re.compile("'DC_DBHOST','(.*)'") db_name_regex = re.compile("'DC_DBNAME','(.*)'") db_user_regex = re.compile("'DC_DBUSER','(.*)'") with open("/home/www/dotclear/inc/config.php", "r", encoding="utf-8") as f: c = f.read() mydb = mysql.connector.connect( host=db_host_regex.findall(c)[0], user=db_user_regex.findall(c)[0], password=db_password_regex.findall(c)[0], database=db_name_regex.findall(c)[0] ) mycursor = mydb.cursor() mycursor.execute("SELECT post_url, post_title, post_content_xhtml \ FROM dc_post WHERE post_url LIKE '%/%/%/%' ORDER BY post_id DESC") myresult = mycursor.fetchall() mydb.close() with open("index.gmi", "w", encoding="utf-8") as f: f.write("# Tourmentine's blog-to-gemlog\n\n") for x in myresult: path = path_regex.findall(x[0])[0] filename = filename_regex.findall(x[0])[0] with open("index.gmi", "a", encoding="utf-8") as f: f.write('=> {0}/{1}.gmi {2} - {3}\n' .format(path, quote(quote(filename)), path.replace("/","-"), x[1])) try: if not os.path.exists(path): os.makedirs(path) except OSError: print (f"Creation of the directory {path} failed") if not os.path.isfile(f'{path}/"{filename}".gmi'): print(f"creating {path}/\"{filename}\".gmi") with open(f'{path}/"{filename}".gmi', "w", encoding="utf-8") as f: f.write(f"# {x[1]}\n\n") f.write(f"PubliƩ le {path}\n\n") f.write(html2markdown.convert(x[2])) f.write("\n\n=> /blog/ Retour au menu du blog") with open("index.gmi", "a", encoding="utf-8") as f: f.write("\n=> Retour")