#!/usr/bin/env python ''' fetch (almost) all blog posts from Dotclear database and export them in Markdown format ''' ''' TODO: use rss/atom feed instead ''' ''' TODO: convert to proper Gemtext ''' import html2markdown import mysql.connector import re import os from urllib.parse import quote path_regex = re.compile('\d+/\d+/\d+') date_regex = re.compile('\d+-\d+-\d+') filename_regex = re.compile('\d+/\d+/\d+/(.*)') db_password_regex = re.compile("'DC_DBPASSWORD','(.*)'") db_host_regex = re.compile("'DC_DBHOST','(.*)'") db_name_regex = re.compile("'DC_DBNAME','(.*)'") db_user_regex = re.compile("'DC_DBUSER','(.*)'") f = open("/home/www/dotclear/inc/config.php", "r") c = f.read() f.close() mydb = mysql.connector.connect( host=db_host_regex.findall(c)[0], user=db_user_regex.findall(c)[0], password=db_password_regex.findall(c)[0], database=db_name_regex.findall(c)[0] ) mycursor = mydb.cursor() mycursor.execute("SELECT post_url, post_title, post_content_xhtml FROM dc_post WHERE post_url LIKE '%/%/%/%' ORDER BY post_id DESC") myresult = mycursor.fetchall() f = open("index.gmi", "w") f.write("# Tourmentine's blog-to-gemlog\n\n"); f.close() for x in myresult: path = path_regex.findall(x[0])[0] filename = filename_regex.findall(x[0])[0] f = open("index.gmi", "a") f.write("=> %s/%s.gmi %s - %s\n" % (path, quote(quote(filename)), path.replace("/","-"), x[1])) f.close() try: if not os.path.exists(path): os.makedirs(path) except OSError: print ("Creation of the directory %s failed" % path) if not os.path.isfile('%s/%s.gmi' % (path,quote(filename))): print("creating %s/%s.gmi" % (path,quote(filename))) f = open("%s/%s.gmi" % (path,quote(filename)), "w") f.write("# %s\n\n" % x[1]) f.write("PubliƩ le %s\n\n" % path) f.write(html2markdown.convert(x[2])) f.write("\n\n=> /blog/ Retour au menu du blog") f.close() f = open("index.gmi", "a") f.write("\n=> Retour") f.close()