#!/usr/bin/env python # -*- coding: utf-8 -*- ################################################################################ import time import urllib2 import re import smtplib def findstring(url): """find if document's last update date is equal to current date""" doc = urllib2.urlopen(url).read() doctime = re.search(r'Catalogue généré le (.*)<', doc) if doctime != None: print url.rstrip('\n') + " : "+doctime.group(1) date = re.match(r'(.*) (.*) (.*)',doctime.group(1)) day = date.group(1) month = date.group(2) year = date.group(3) #day = re.match(r'\d (.*)',doctime.group(1)) if day < 10: day = '0' + day months = { 'janv.' : '01', 'févr.' : '02', 'mars' : '03', 'avr.' : '04', 'mai' : '05', 'juin' : '06', 'juil.' : '07', 'aout' : '08', 'sept.' : '09', 'oct.' : '10', 'nov.' : '11', 'déc' : '12'} for i,j in months.iteritems(): month = month.replace(i,j) doctime = time.strptime(day + ' ' + month + ' ' + year, '%d %m %Y')[0:3] curtime = time.localtime()[0:3] if doctime == curtime: rooturl = re.match('(.*)/_catalog/(.*)',url) url = rooturl.group(1) + '/_catalog/index.html' sendmail('OPDS Update','catalog ' + url + ' updated!') return True else: return False else: sendmail('OPDS Error','string not found at catalog ' + url ) def sendmail(subject,corpus): """send an alert e-mail""" fromaddr = "noreply@tourmentine.com" mto = "n@tourmentine.com" message = """From: No Reply To: nico Subject: """+subject+""" """+corpus+""" """ smtp = smtplib.SMTP() smtp.connect() smtp.sendmail(fromaddr,mto,message) smtp.close() print time.strftime('%c') + ' : Starting harvest...\n' f = open('opdslist.txt') for line in f: if not re.match(r'^#',line): findstring(line) print '\n'