318 lines
10 KiB
PHP
Executable file
318 lines
10 KiB
PHP
Executable file
#!/usr/local/bin/php -c /etc/php.ini
|
|
|
|
<?php
|
|
/***************************************************
|
|
** html2rss.php - 13/07/02 n@tourmentine.com
|
|
***************************************************
|
|
* 20/10/04: ajout formats RSS 2.0 et Atom 0.3
|
|
***************************************************
|
|
* prend en paramètre un fichier de config,
|
|
* en fonction des paramètres, télécharge les news,
|
|
* les parse, puis écris les données dans les
|
|
* fichiers backends (en txt et rss 0.91)
|
|
***************************************************/
|
|
|
|
|
|
|
|
/**********************************************************************
|
|
* write2txt()
|
|
**********************************************************************
|
|
* écrit les $NbMde premères valeurs de $tableau, à savoir les
|
|
* champs link et title (voir fonction parse_html())
|
|
* dans le fichier $file, en ajoutant au besoin (si défini) la
|
|
* variable $bu correspondant à l'url de base ($baseurl dans le .ini)
|
|
**********************************************************************
|
|
* format: fichier de news au format texte
|
|
*
|
|
* %%
|
|
* titre
|
|
* url
|
|
* %%
|
|
* titre
|
|
* (...)
|
|
**********************************************************************/
|
|
|
|
function write2txt($inifiledata,$tableau)
|
|
{
|
|
|
|
extract($inifiledata);
|
|
echo $tableau[0]["title"];
|
|
$totalnews = count($tableau);
|
|
|
|
if ($NbMax > $totalnews)
|
|
$NbMax = $totalnews;
|
|
|
|
$outputfile = "/home/www/website/backends/txt/$filename.txt";
|
|
|
|
if ($totalnews > 1)
|
|
{
|
|
$backend = fopen("$outputfile","w");
|
|
|
|
for($n = $offset+1 ; $n <= $NbMax+$offset ; $n++)
|
|
{
|
|
fwrite ($backend,"%%\n");
|
|
fwrite ($backend,$tableau[$n]["title"]."\n");
|
|
if (!strstr($tableau[$n]["link"],"http://"))
|
|
$tableau[$n]["link"]=$baseurl.$tableau[$n]["link"];
|
|
fwrite ($backend,$tableau[$n]["link"]."\n");
|
|
}
|
|
fclose($backend);
|
|
echo "$outputfile généré (".($n-1)." enregistrements).\n";
|
|
}
|
|
else echo "$outputfile non créé : nombre de news insuffisant ($totalnews)\n";
|
|
}
|
|
|
|
|
|
/**********************************************************************
|
|
* write2rss091()
|
|
**********************************************************************
|
|
* écrit les $NbMaxde premères valeurs de $tableau, à savoir les
|
|
* champs link et title (voir fonction parse_html())
|
|
* dans le fichier $file, en ajoutant au besoin (si défini) la
|
|
* variable $bu correspondant à l'url de base ($baseurl dans le .ini)
|
|
**********************************************************************
|
|
* format: voir spécificications RSS 0.91:
|
|
* http://my.netscape.com/publish/formats/rss-spec-0.91.html
|
|
**********************************************************************/
|
|
|
|
function write2rss091($inifiledata,$tableau)
|
|
{
|
|
|
|
extract($inifiledata);
|
|
|
|
$entete = "<?xml version=\"1.0\" encoding=\"ISO-8859-1\"?>
|
|
|
|
<!DOCTYPE rss PUBLIC \"-//Netscape Communications//DTD RSS 0.91//EN\"
|
|
\"http://my.netscape.com/publish/formats/rss-0.91.dtd\">
|
|
|
|
<rss version=\"0.91\">
|
|
|
|
<channel>
|
|
|
|
<title>$titlesite</title>
|
|
<link>$linksite</link>
|
|
<description>$descriptionsite</description>
|
|
<language>$languagesite</language>\n\n";
|
|
|
|
$outputfile = "/home/www/website/backends/xml/$filename.rss";
|
|
|
|
$totalnews = count($tableau);
|
|
|
|
if ($NbMax > $totalnews)
|
|
$NbMax = $totalnews;
|
|
|
|
if ($totalnews > 1)
|
|
{
|
|
$backend = fopen("$outputfile","w");
|
|
fwrite ($backend,$entete);
|
|
for($n = $offset+1 ; $n <= $NbMax+$offset ; $n++)
|
|
{
|
|
fwrite ($backend," <item>\n");
|
|
fwrite ($backend," <title>".$tableau[$n]["title"]."</title>\n");
|
|
if (!strstr($tableau[$n]["link"],"http://") && isset($baseurl))
|
|
$tableau[$n]["link"]=$baseurl.$tableau[$n]["link"];
|
|
fwrite ($backend," <link>".htmlentities($tableau[$n]["link"])."</link>\n");
|
|
fwrite ($backend," </item>\n\n");
|
|
}
|
|
fwrite ($backend," </channel>\n\n</rss>");
|
|
fclose($backend);
|
|
echo "$outputfile généré (".($n-1)." enregistrements).\n";
|
|
}
|
|
else echo "$outputfile non créé : nombre de news insuffisant ($totalnews)\n";
|
|
}
|
|
|
|
/**********************************************************************
|
|
* write2rss()
|
|
**********************************************************************
|
|
* idem write2rss091, mais pour format 2.0
|
|
**********************************************************************/
|
|
|
|
function write2rss($inifiledata,$tableau)
|
|
{
|
|
|
|
extract($inifiledata);
|
|
|
|
$entete = "<?xml version=\"1.0\" encoding=\"ISO-8859-1\"?>
|
|
|
|
<rss version=\"2.0\">
|
|
|
|
<channel>
|
|
|
|
<title>$titlesite</title>
|
|
<link>$linksite</link>
|
|
<description>$descriptionsite</description>
|
|
<language>$languagesite</language>
|
|
<pubDate>".date("r")."</pubDate>
|
|
<lastBuildDate>".date("r")."</lastBuildDate>
|
|
<generator>WebLoom beta</generator>\n\n";
|
|
|
|
$outputfile = "/home/www/website/backends/xml/$filename.rss";
|
|
|
|
$totalnews = count($tableau);
|
|
|
|
if ($NbMax > $totalnews)
|
|
$NbMax = $totalnews;
|
|
|
|
if ($totalnews > 1)
|
|
{
|
|
$backend = fopen("$outputfile","w");
|
|
fwrite ($backend,$entete);
|
|
for($n = $offset+1 ; $n <= $NbMax+$offset ; $n++)
|
|
{
|
|
fwrite ($backend," <item>\n");
|
|
fwrite ($backend," <title>".$tableau[$n]["title"]."</title>\n");
|
|
if (!strstr($tableau[$n]["link"],"http://") && isset($baseurl))
|
|
$tableau[$n]["link"]=$baseurl.$tableau[$n]["link"];
|
|
fwrite ($backend," <link>".htmlentities($tableau[$n]["link"])."</link>\n");
|
|
fwrite ($backend," </item>\n\n");
|
|
}
|
|
fwrite ($backend," </channel>\n\n</rss>");
|
|
fclose($backend);
|
|
echo "$outputfile généré (".($n-1)." enregistrements).\n";
|
|
}
|
|
else echo "$outputfile non créé : nombre de news insuffisant ($totalnews)\n";
|
|
}
|
|
|
|
/**********************************************************************
|
|
* write2atom()
|
|
**********************************************************************
|
|
* écrit les $NbMaxde premères valeurs de $tableau, à savoir les
|
|
* champs link et title (voir fonction parse_html())
|
|
* dans le fichier $file, en ajoutant au besoin (si défini) la
|
|
* variable $bu correspondant à l'url de base ($baseurl dans le .ini)
|
|
**********************************************************************
|
|
* format: voir le brouillon des spécificications Atom 0.3:
|
|
* http://www.ietf.org/internet-drafts/draft-ietf-atompub-format-02.txt
|
|
**********************************************************************/
|
|
|
|
function write2atom($inifiledata,&$tableau)
|
|
{
|
|
|
|
extract($inifiledata);
|
|
|
|
$entete = "<?xml version=\"1.0\" encoding=\"ISO-8859-1\"?>
|
|
<feed version=\"0.3\"
|
|
xmlns=\"http://purl.org/atom/ns#\"
|
|
xml:lang=\"$languagesite\">
|
|
|
|
<title>$titlesite</title>
|
|
<link rel=\"alternate\" type=\"text/html\" href=\"$linksite\" />
|
|
<generator url=\"http://tourmentine.com/\" version=\"beta\">WebLoom</generator>
|
|
<tagline>$descriptionsite</tagline>
|
|
<modified>".date("c")."</modified>\n\n";
|
|
|
|
|
|
$outputfile = "/home/www/website/backends/atom/$filename.xml";
|
|
|
|
$totalnews = count($tableau);
|
|
|
|
if ($NbMax > $totalnews)
|
|
$NbMax = $totalnews;
|
|
|
|
if ($totalnews > 1)
|
|
{
|
|
$backend = fopen("$outputfile","w");
|
|
fwrite ($backend,$entete);
|
|
for($n = $offset+1 ; $n <= $NbMax+$offset ; $n++)
|
|
{
|
|
//print_r($tableau[$n]);
|
|
fwrite ($backend," <entry>\n");
|
|
fwrite ($backend," <title>".$tableau[$n]["title"]."</title>\n");
|
|
if (!strstr($tableau[$n]["link"],"http://") && isset($baseurl))
|
|
$tableau[$n]["link"]=$baseurl.$tableau[$n]["link"];
|
|
fwrite ($backend," <id>".htmlentities($tableau[$n]["link"])."</id>\n");
|
|
fwrite ($backend," <link rel=\"alternate\" type=\"text/html\" href=\"".htmlentities($tableau[$n]["link"])."\" />\n");
|
|
fwrite ($backend," <summary>".$tableau[$n]["summary"]."</summary>\n");
|
|
fwrite ($backend," <modified>".date("c")."</modified>\n");
|
|
fwrite ($backend," <issued>".date("c")."</issued>\n");
|
|
fwrite ($backend," <author><name>".$tableau[$n]["author"]."</name></author>\n");
|
|
fwrite ($backend," </entry>\n\n");
|
|
}
|
|
fwrite ($backend,"</feed>");
|
|
fclose($backend);
|
|
echo "$outputfile généré (".($n-1)." enregistrements).\n";
|
|
}
|
|
else echo "$outputfile non créé : nombre de news insuffisant ($totalnews)\n";
|
|
}
|
|
|
|
/*********************************************************************************************/
|
|
|
|
$f=$argv[1];
|
|
|
|
include("/home/www/website/modules/backends/conf/$f");
|
|
if (!isset($NbMax) || $NbMax=="")
|
|
$NbMax = 10;
|
|
|
|
$inidata = compact('source','filename','titlesite','linksite','descriptionsite','languagesite','baseurl','grep','offset','NbMax');
|
|
|
|
$data = implode("",file($source));
|
|
// le nettoie
|
|
$data = strtr($data,"\n\0\r\t"," ");
|
|
$data = ereg_replace("é","é",$data);
|
|
$data = ereg_replace("ù","ù",$data);
|
|
$data = ereg_replace("à","à",$data);
|
|
$data = ereg_replace("è","è",$data);
|
|
$data = ereg_replace("ê","ê",$data);
|
|
$data = ereg_replace("î","î",$data);
|
|
$data = ereg_replace("ô","ô",$data);
|
|
$data = ereg_replace("ï;","ï",$data);
|
|
//$data = ereg_replace("&","&",$data);
|
|
$data = html_entity_decode($data);
|
|
//$data = utf8_decode($data);
|
|
$data = trim($data);
|
|
|
|
preg_match_all($grep,$data,$res,PREG_SET_ORDER);
|
|
|
|
$index = 1;
|
|
foreach ($res as $elt)
|
|
{
|
|
if ($elt[1] != "" && $elt[2] != "")
|
|
{
|
|
$niouzes[$index]["link"] = $elt[1];
|
|
if (file_exists($baseurl.$elt[1]))
|
|
{
|
|
$dataitem = implode("",file($baseurl.$elt[1]));
|
|
$dataitem = strtr($dataitem,"\n\0\r"," ");
|
|
$dataitem = trim($dataitem);
|
|
if (isset($grepitemdate))
|
|
{
|
|
preg_match_all($grepitemdate,$dataitem,$resitem,PREG_SET_ORDER);
|
|
$niouzes[$index]["date"] = $resitem[0][1];
|
|
}
|
|
else
|
|
$niouzes[$index]["date"] = "unknown";
|
|
if (isset($grepitemsummary))
|
|
{
|
|
preg_match_all($grepitemsummary,$dataitem,$resitem,PREG_SET_ORDER);
|
|
if (strlen(strip_tags($resitem[0][1])) > 200)
|
|
$niouzes[$index]["summary"] = substr(strip_tags($resitem[0][1]), 0, 200)."...";
|
|
else
|
|
$niouzes[$index]["summary"] = strip_tags($resitem[0][1]);
|
|
}
|
|
else
|
|
$niouzes[$index]["summary"] = "unknown";
|
|
if (isset($author))
|
|
{
|
|
$niouzes[$index]["author"] = $author;
|
|
}
|
|
else if (isset($grepitemauthor))
|
|
{
|
|
preg_match_all($grepitemauthor,$dataitem,$resitem,PREG_SET_ORDER);
|
|
$niouzes[$index]["author"] = $resitem[0][1];
|
|
}
|
|
else
|
|
$niouzes[$index]["author"] = "unknown";
|
|
}
|
|
$niouzes[$index]["title"] = trim($elt[2]);
|
|
}
|
|
$index++;
|
|
}
|
|
|
|
// écrit les données
|
|
//write2txt($inidata,$niouzes);
|
|
//write2rss091($inidata,$niouzes);
|
|
write2rss($inidata,&$niouzes);
|
|
write2atom($inidata,$niouzes);
|
|
|
|
|
|
?>
|