various scripts for various tasks.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

317 lines
10 KiB

  1. #!/usr/bin/php -q
  2. <?php
  3. /***************************************************
  4. ** html2rss.php - 13/07/02 n@tourmentine.com
  5. ***************************************************
  6. * 20/10/04: ajout formats RSS 2.0 et Atom 0.3
  7. ***************************************************
  8. * prend en param�tre un fichier de config,
  9. * en fonction des param�tres, t�l�charge les news,
  10. * les parse, puis �cris les donn�es dans les
  11. * fichiers backends (en txt et rss 0.91)
  12. ***************************************************/
  13. /**********************************************************************
  14. * write2txt()
  15. **********************************************************************
  16. * �crit les $NbMde prem�res valeurs de $tableau, savoir les
  17. * champs link et title (voir fonction parse_html())
  18. * dans le fichier $file, en ajoutant au besoin (si d�fini) la
  19. * variable $bu correspondant l'url de base ($baseurl dans le .ini)
  20. **********************************************************************
  21. * format: fichier de news au format texte
  22. *
  23. * %%
  24. * titre
  25. * url
  26. * %%
  27. * titre
  28. * (...)
  29. **********************************************************************/
  30. function write2txt($inifiledata,$tableau)
  31. {
  32. extract($inifiledata);
  33. echo $tableau[0]["title"];
  34. $totalnews = count($tableau);
  35. if ($NbMax > $totalnews)
  36. $NbMax = $totalnews;
  37. $outputfile = "/var/www/website/backends/txt/$filename.txt";
  38. if ($totalnews > 1)
  39. {
  40. $backend = fopen("$outputfile","w");
  41. for($n = $offset+1 ; $n <= $NbMax+$offset ; $n++)
  42. {
  43. fwrite ($backend,"%%\n");
  44. fwrite ($backend,$tableau[$n]["title"]."\n");
  45. if (!strstr($tableau[$n]["link"],"http://"))
  46. $tableau[$n]["link"]=$baseurl.$tableau[$n]["link"];
  47. fwrite ($backend,$tableau[$n]["link"]."\n");
  48. }
  49. fclose($backend);
  50. echo "$outputfile g�n�r� (".($n-1)." enregistrements).\n";
  51. }
  52. else echo "$outputfile non cr�� : nombre de news insuffisant ($totalnews)\n";
  53. }
  54. /**********************************************************************
  55. * write2rss091()
  56. **********************************************************************
  57. * �crit les $NbMaxde prem�res valeurs de $tableau, savoir les
  58. * champs link et title (voir fonction parse_html())
  59. * dans le fichier $file, en ajoutant au besoin (si d�fini) la
  60. * variable $bu correspondant l'url de base ($baseurl dans le .ini)
  61. **********************************************************************
  62. * format: voir sp�cificications RSS 0.91:
  63. * http://my.netscape.com/publish/formats/rss-spec-0.91.html
  64. **********************************************************************/
  65. function write2rss091($inifiledata,$tableau)
  66. {
  67. extract($inifiledata);
  68. $entete = "<?xml version=\"1.0\" encoding=\"ISO-8859-1\"?>
  69. <!DOCTYPE rss PUBLIC \"-//Netscape Communications//DTD RSS 0.91//EN\"
  70. \"http://my.netscape.com/publish/formats/rss-0.91.dtd\">
  71. <rss version=\"0.91\">
  72. <channel>
  73. <title>$titlesite</title>
  74. <link>$linksite</link>
  75. <description>$descriptionsite</description>
  76. <language>$languagesite</language>\n\n";
  77. $outputfile = "/var/www/website/backends/xml/$filename.rss";
  78. $totalnews = count($tableau);
  79. if ($NbMax > $totalnews)
  80. $NbMax = $totalnews;
  81. if ($totalnews > 1)
  82. {
  83. $backend = fopen("$outputfile","w");
  84. fwrite ($backend,$entete);
  85. for($n = $offset+1 ; $n <= $NbMax+$offset ; $n++)
  86. {
  87. fwrite ($backend," <item>\n");
  88. fwrite ($backend," <title>".$tableau[$n]["title"]."</title>\n");
  89. if (!strstr($tableau[$n]["link"],"http://") && isset($baseurl))
  90. $tableau[$n]["link"]=$baseurl.$tableau[$n]["link"];
  91. fwrite ($backend," <link>".htmlentities($tableau[$n]["link"])."</link>\n");
  92. fwrite ($backend," </item>\n\n");
  93. }
  94. fwrite ($backend," </channel>\n\n</rss>");
  95. fclose($backend);
  96. echo "$outputfile g�n�r� (".($n-1)." enregistrements).\n";
  97. }
  98. else echo "$outputfile non cr�� : nombre de news insuffisant ($totalnews)\n";
  99. }
  100. /**********************************************************************
  101. * write2rss()
  102. **********************************************************************
  103. * idem write2rss091, mais pour format 2.0 (!)
  104. **********************************************************************/
  105. function write2rss($inifiledata,$tableau)
  106. {
  107. extract($inifiledata);
  108. $entete = "<?xml version=\"1.0\" encoding=\"ISO-8859-1\"?>
  109. <rss version=\"2.0\">
  110. <channel>
  111. <title>$titlesite</title>
  112. <link>$linksite</link>
  113. <description>$descriptionsite</description>
  114. <language>$languagesite</language>
  115. <pubDate>".date("r")."</pubDate>
  116. <lastBuildDate>".date("r")."</lastBuildDate>
  117. <generator>WebLoom beta</generator>\n\n";
  118. $outputfile = "/var/www/website/backends/xml/$filename.rss";
  119. $totalnews = count($tableau);
  120. if ($NbMax > $totalnews)
  121. $NbMax = $totalnews;
  122. if ($totalnews > 1)
  123. {
  124. $backend = fopen("$outputfile","w");
  125. fwrite ($backend,$entete);
  126. for($n = $offset+1 ; $n <= $NbMax+$offset ; $n++)
  127. {
  128. fwrite ($backend," <item>\n");
  129. fwrite ($backend," <title>".$tableau[$n]["title"]."</title>\n");
  130. if (!strstr($tableau[$n]["link"],"http://") && isset($baseurl))
  131. $tableau[$n]["link"]=$baseurl.$tableau[$n]["link"];
  132. fwrite ($backend," <link>".htmlentities($tableau[$n]["link"])."</link>\n");
  133. fwrite ($backend," </item>\n\n");
  134. }
  135. fwrite ($backend," </channel>\n\n</rss>");
  136. fclose($backend);
  137. echo "$outputfile g�n�r� (".($n-1)." enregistrements).\n";
  138. }
  139. else echo "$outputfile non cr�� : nombre de news insuffisant ($totalnews)\n";
  140. }
  141. /**********************************************************************
  142. * write2atom()
  143. **********************************************************************
  144. * �crit les $NbMaxde prem�res valeurs de $tableau, savoir les
  145. * champs link et title (voir fonction parse_html())
  146. * dans le fichier $file, en ajoutant au besoin (si d�fini) la
  147. * variable $bu correspondant l'url de base ($baseurl dans le .ini)
  148. **********************************************************************
  149. * format: voir le brouillon des sp�cificications Atom 0.3:
  150. * http://www.ietf.org/internet-drafts/draft-ietf-atompub-format-02.txt
  151. **********************************************************************/
  152. function write2atom($inifiledata,&$tableau)
  153. {
  154. extract($inifiledata);
  155. $entete = "<?xml version=\"1.0\" encoding=\"ISO-8859-1\"?>
  156. <feed version=\"0.3\"
  157. xmlns=\"http://purl.org/atom/ns#\"
  158. xml:lang=\"$languagesite\">
  159. <title>$titlesite</title>
  160. <link rel=\"alternate\" type=\"text/html\" href=\"$linksite\" />
  161. <generator url=\"http://tourmentine.com/\" version=\"beta\">WebLoom</generator>
  162. <tagline>$descriptionsite</tagline>
  163. <modified>".date("c")."</modified>\n\n";
  164. $outputfile = "/var/www/website/backends/atom/$filename.xml";
  165. $totalnews = count($tableau);
  166. if ($NbMax > $totalnews)
  167. $NbMax = $totalnews;
  168. if ($totalnews > 1)
  169. {
  170. $backend = fopen("$outputfile","w");
  171. fwrite ($backend,$entete);
  172. for($n = $offset+1 ; $n <= $NbMax+$offset ; $n++)
  173. {
  174. //print_r($tableau[$n]);
  175. fwrite ($backend," <entry>\n");
  176. fwrite ($backend," <title>".$tableau[$n]["title"]."</title>\n");
  177. if (!strstr($tableau[$n]["link"],"http://") && isset($baseurl))
  178. $tableau[$n]["link"]=$baseurl.$tableau[$n]["link"];
  179. fwrite ($backend," <id>".htmlentities($tableau[$n]["link"])."</id>\n");
  180. fwrite ($backend," <link rel=\"alternate\" type=\"text/html\" href=\"".htmlentities($tableau[$n]["link"])."\" />\n");
  181. fwrite ($backend," <summary>".$tableau[$n]["summary"]."</summary>\n");
  182. fwrite ($backend," <modified>".date("c")."</modified>\n");
  183. fwrite ($backend," <issued>".date("c")."</issued>\n");
  184. fwrite ($backend," <author><name>".$tableau[$n]["author"]."</name></author>\n");
  185. fwrite ($backend," </entry>\n\n");
  186. }
  187. fwrite ($backend,"</feed>");
  188. fclose($backend);
  189. echo "$outputfile g�n�r� (".($n-1)." enregistrements).\n";
  190. }
  191. else echo "$outputfile non cr�� : nombre de news insuffisant ($totalnews)\n";
  192. }
  193. /*********************************************************************************************/
  194. $f=$argv[1];
  195. include("/var/www/website/modules/backends/conf/$f");
  196. if (!isset($NbMax) || $NbMax=="")
  197. $NbMax = 10;
  198. $inidata = compact('source','filename','titlesite','linksite','descriptionsite','languagesite','baseurl','grep','offset','NbMax');
  199. $data = implode("",file($source));
  200. // le nettoie
  201. $data = strtr($data,"\n\0\r\t"," ");
  202. /*$data = ereg_replace("&eacute;","",$data);
  203. $data = ereg_replace("&ugrave;","",$data);
  204. $data = ereg_replace("&agrave;","",$data);
  205. $data = ereg_replace("&egrave;","",$data);
  206. $data = ereg_replace("&ecirc;","",$data);
  207. $data = ereg_replace("&icirc;","",$data);
  208. $data = ereg_replace("&ocirc;","",$data);
  209. $data = ereg_replace("&iuml;;","",$data);
  210. $data = ereg_replace("&amp;","&",$data);*/
  211. $data = html_entity_decode($data);
  212. $data = trim($data);
  213. preg_match_all($grep,$data,$res,PREG_SET_ORDER);
  214. $index = 1;
  215. foreach ($res as $elt)
  216. {
  217. if ($elt[1] != "" && $elt[2] != "")
  218. {
  219. $niouzes[$index]["link"] = $elt[1];
  220. if (file_exists($baseurl.$elt[1]))
  221. {
  222. $dataitem = implode("",file($baseurl.$elt[1]));
  223. $dataitem = strtr($dataitem,"\n\0\r"," ");
  224. $dataitem = trim($dataitem);
  225. if (isset($grepitemdate))
  226. {
  227. preg_match_all($grepitemdate,$dataitem,$resitem,PREG_SET_ORDER);
  228. $niouzes[$index]["date"] = $resitem[0][1];
  229. }
  230. else
  231. $niouzes[$index]["date"] = "unknown";
  232. if (isset($grepitemsummary))
  233. {
  234. preg_match_all($grepitemsummary,$dataitem,$resitem,PREG_SET_ORDER);
  235. if (strlen(strip_tags($resitem[0][1])) > 200)
  236. $niouzes[$index]["summary"] = substr(strip_tags($resitem[0][1]), 0, 200)."...";
  237. else
  238. $niouzes[$index]["summary"] = strip_tags($resitem[0][1]);
  239. }
  240. else
  241. $niouzes[$index]["summary"] = "unknown";
  242. if (isset($author))
  243. {
  244. $niouzes[$index]["author"] = $author;
  245. }
  246. else if (isset($grepitemauthor))
  247. {
  248. preg_match_all($grepitemauthor,$dataitem,$resitem,PREG_SET_ORDER);
  249. $niouzes[$index]["author"] = $resitem[0][1];
  250. }
  251. else
  252. $niouzes[$index]["author"] = "unknown";
  253. }
  254. $niouzes[$index]["title"] = trim($elt[2]);
  255. }
  256. $index++;
  257. }
  258. // �crit les donn�es
  259. //write2txt($inidata,$niouzes);
  260. //write2rss091($inidata,$niouzes);
  261. write2rss($inidata,&$niouzes);
  262. write2atom($inidata,$niouzes);
  263. ?>