Diffs
Keires_Feed/trunk/src/Keires/ExceptionWrapper.php
@@ -0,0 +1,60 @@
+<?php
+/**
+ * Keires_Feed
+ *
+ * Feed Parser like XML_Feed_Parser
+ *
+ * PHP version 5
+ *
+ * LICENSE: This source file is subject to version 3.0 of the PHP license
+ * that is available through the world-wide-web at the following URI:
+ * http://www.php.net/license/3_0.txt. If you did not receive a copy of
+ * the PHP License and are unable to obtain it through the web, please
+ * send a note to license@php.net so we can mail you a copy immediately.
+ *
+ * @category XML
+ * @package Keires_Feed
+ * @author KOYAMA Tetsuji <koyama@hoge.hoge.org>
+ * @copyright 2008-2009 KOYAMA Tetsuji
+ * @license http://www.php.net/license/3_0.txt PHP License 3.0
+ * @version svn: $Id$
+ * @link http://openpear.org/package/Keires_Feed
+ * @since File available since Release 0.1
+ */
+
+require_once 'PEAR.php';
+require_once 'PEAR/Exception.php';
+
+class Keires_ExceptionWrapper {
+
+ static public function init() {
+ static $initialized = false;
+ if (!$initialized) {
+ PEAR::setErrorHandling(PEAR_ERROR_CALLBACK,
+ array('Keires_ExceptionWrapper',
+ 'handleError'));
+ $initialized = true;
+ }
+ }
+
+ static public function wrap($obj) {
+ if (is_a($obj, 'PEAR')) {
+ $obj->setErrorHandling(PEAR_ERROR_CALLBACK,
+ array('Keires_ExceptionWrapper',
+ 'handleError'));
+ }
+ }
+
+ static public function handleError($err) {
+ throw self::factory($err);
+ }
+
+ static public function factory($err) {
+ // default exception class
+ $class = 'PEAR_Exception';
+
+ return new $class($err->getMessage(), $err->getCode());
+ }
+}
+
+?>
\ No newline at end of file
属性に変更があったパス: Keires_Feed/trunk/src/Keires/ExceptionWrapper.php
___________________________________________________________________
追加: svn:keywords
+ Id
追加: svn:eol-style
+ native
Keires_Feed/trunk/src/Keires/FeedParser.php
@@ -0,0 +1,239 @@
+<?php
+/**
+ * Keires_Feed
+ *
+ * Feed Parser like XML_Feed_Parser
+ *
+ * PHP version 5
+ *
+ * LICENSE: This source file is subject to version 3.0 of the PHP license
+ * that is available through the world-wide-web at the following URI:
+ * http://www.php.net/license/3_0.txt. If you did not receive a copy of
+ * the PHP License and are unable to obtain it through the web, please
+ * send a note to license@php.net so we can mail you a copy immediately.
+ *
+ * @category XML
+ * @package Keires_Feed
+ * @author KOYAMA Tetsuji <koyama@hoge.hoge.org>
+ * @copyright 2008-2009 KOYAMA Tetsuji
+ * @license http://www.php.net/license/3_0.txt PHP License 3.0
+ * @version svn: $Id$
+ * @link http://openpear.org/package/Keires_Feed
+ * @since File available since Release 0.1
+ *
+ * based on Rasmus Lerdorf's simple_rss.php
+ * ( http://lerdorf.com/php/simple_rss.phps
+ */
+
+require_once 'Keires/Feed.php';
+
+class Keires_FeedParser {
+
+ protected $xmlversion = '1.0';
+ protected $encoding = null;
+ protected $xml = null;
+ protected $feed = null;
+
+ public function __construct($xml, $options = null) {
+ $this->xml = $xml;
+ $this->analyze();
+ }
+
+ public function getFeed() {
+ return $this->feed;
+ }
+
+ static protected function firstLine(&$text) {
+ $p1 = strpos($text, "\r");
+ $p2 = strpos($text, "\n");
+ $p = FALSE;
+
+ if ($p1 === FALSE) {
+ $p = $p2;
+ } else {
+ if ($p2 === FALSE) {
+ $p = $p1;
+ } else {
+ $p = min($p1, $p2);
+ }
+ }
+ if ($p === FALSE) {
+ // no or long length line
+ return substr($text, 0, 256);
+ }
+ return substr($text, 0, $p);
+ }
+
+ protected function analyze() {
+ $xml = $this->xml;
+
+ $first_line = $this->firstLine($xml);
+ $nmatch = preg_match('!<?xml version=[\'"](.+)[\'"] encoding=[\'"](.+)[\'"]\s*\?>!i',
+ $first_line, $matches);
+ if ($nmatch != 1) {
+ throw new Keires_Exception('Invalid XML document',
+ KEIRES_ERROR_INVALID_XML);
+ }
+
+ $this->xmlversion = $matches[1];
+ $this->encoding = $matches[2];
+ }
+
+ public function parse() {
+ $options = array(
+ 'version' => $this->xmlversion,
+ 'encoding' => $this->encoding,
+ );
+ $feed = self::parseXML($this->xml, $options);
+ $this->feed = $feed;
+ }
+
+ public function type() {
+ if (empty($this->feed)) {
+ throw new Keires_Exception('feed is not parsed yet',
+ KEIRES_ERROR_UNPARSED);
+ }
+ return $this->feed['_type'];
+ }
+
+ public function items() {
+ if (empty($this->feed)) {
+ throw new Keires_Exception('feed not parsed yet',
+ KEIRES_ERROR_UNPARSED);
+ }
+ return $this->feed['item'];
+ }
+
+ static public function feedType($dom) {
+ $type = null;
+ foreach ($dom->childNodes as $child) {
+ $node = $child->nodeName;
+ switch ($node) {
+ case 'rss':
+ $type = 'rss';
+ break;
+ case 'feed':
+ $type = 'atom';
+ break;
+ default:
+ if (strncmp($node, 'rdf', 3) === 0) {
+ $type = 'rss';
+ }
+ }
+ if (!empty($type)) {
+ break;
+ }
+ }
+ return $type;
+ }
+
+ static public function parseXML($input_xml, $options) {
+ $version = array_val($options, 'version', '1.0');
+ $encoding = array_val($options, 'encoding', 'UTF-8');
+
+ $dom = new DOMDocument($version, $encoding);
+
+ $result = $dom->loadXML($input_xml);
+ if ($result === FALSE) {
+ throw new Keires_Exception('DOMDocument load error',
+ KEIRES_ERROR_DOM_LOADING);
+ }
+ $feedtype = self::feedType($dom);
+
+ // Pick out the namespaces that apply to this doc.
+ // We need to do this from DOM because simplexml does't see the
+ // special xmlns attributes because of the way libxml2 handles them.
+ $xpath = new DOMXPath($dom);
+ $ns = array(''=>NULL);
+ foreach($xpath->query("namespace::*") as $v) {
+ if($v->localName!='xml') $ns[$v->localName] = $v->nodeValue;
+ }
+ $atom10 = $atom03 = false;
+ if ($feedtype != 'rss') {
+ if (in_array('http://www.w3.org/2005/Atom', $ns)) {
+ $atom10 = true;
+ unset($ns['xmlns']);
+ } else if (in_array('http://purl.org/atom/ns#', $ns)) {
+ $atom03 = true;
+ unset($ns['xmlns']);
+ }
+ }
+
+ // Ok, now we can switch to simplexml
+ $xml = simplexml_import_dom($dom);
+ if ($xml === null) {
+ throw new Keires_Exception('XML format error',
+ KEIRES_ERROR_INVALID_XML);
+ }
+ $feed = array();
+
+ // Pull out the root attributes - usually just version
+ foreach($xml->attributes() as $k=>$v) $feed[$k] = (string)$v;
+
+ // We will deal with the items separately, so start by only looking
+ // at the stuff leading up to the items checking each namespace.
+ $rss1 = false;
+ if ($atom10 || $atom03) {
+ $top = $xml;
+ } else {
+ $top = $xml->channel;
+ }
+ foreach($ns as $alias=>$uri) foreach($top->children($uri) as $key=>$val) {
+ if($key=="item" || $key=="entry") continue;
+ if($key=="items") {
+ $rss1 = true; continue;
+ }
+ if(!$val->children()) {
+ $feed[$key][0] = (string)$val;
+ foreach($ns as $a=>$u) foreach($val->attributes($u) as $at=>$atv) {
+ $feed[$key][$at] = (string)$atv;
+ }
+ } else {
+ foreach($val->children() as $k=>$v) {
+ $feed[$key][$k] = (string)$v;
+ foreach($v->attributes() as $at=>$atv) {
+ $feed[$k][$at] = (string)$atv;
+ }
+ }
+ }
+ }
+
+ // Now we deal with the items
+ // Atom and RSS1 have the feed items a level higher than RSS2
+ $i = 0;
+ if($rss1) { $feed['_type']='rss1.0'; $items = $xml->item; }
+ else if($atom10) { $feed['_type']='atom1.0'; $items = $xml->entry; }
+ else if($atom03) { $feed['_type']='atom0.3'; $items = $xml->entry; }
+ else {
+ if($feed['version']=='2.0') $feed['_type']='rss2.0';
+ else if($feed['version']=='0.91') $feed['_type']='rss0.91';
+ $items = $xml->channel->item;
+ }
+ foreach($items as $key=>$val) {
+ foreach($ns as $a=>$u) foreach($val->attributes($u) as $at=>$atv) {
+ $feed['item'][$i][$at] = (string)$atv;
+ }
+ foreach($ns as $alias=>$uri) {
+ foreach($val->children($uri) as $k=>$v) {
+ $feed['item'][$i][$k][0] = (string)$v;
+ foreach($v->attributes() as $at=>$atv) {
+ $at_val = (string)$atv;
+ if ($atom10 || $atom03) {
+ $feed['item'][$i][$k][$at][] = $at_val;
+ // Don't even try parsing this stuff, just pass it through.
+ if($at_val=='xhtml' || $at_val=='html' || $at_val=='text') {
+ $tags = $v->children();
+ $feed['item'][$i][$k]['text'] = $tags->asXML();
+ }
+ } else $feed['item'][$i][$k][$at] = $at_val;
+
+ }
+ }
+ }
+ $i++;
+ }
+ return $feed;
+ }
+}
+
+?>
属性に変更があったパス: Keires_Feed/trunk/src/Keires/FeedParser.php
___________________________________________________________________
追加: svn:keywords
+ Id
追加: svn:eol-style
+ native
Keires_Feed/trunk/src/Keires/Feed.php
@@ -0,0 +1,262 @@
+<?php
+/**
+ * Keires_Feed
+ *
+ * Feed Parser like XML_Feed_Parser
+ *
+ * PHP version 5
+ *
+ * LICENSE: This source file is subject to version 3.0 of the PHP license
+ * that is available through the world-wide-web at the following URI:
+ * http://www.php.net/license/3_0.txt. If you did not receive a copy of
+ * the PHP License and are unable to obtain it through the web, please
+ * send a note to license@php.net so we can mail you a copy immediately.
+ *
+ * @category XML
+ * @package Keires_Feed
+ * @author KOYAMA Tetsuji <koyama@hoge.hoge.org>
+ * @copyright 2008-2009 KOYAMA Tetsuji
+ * @license http://www.php.net/license/3_0.txt PHP License 3.0
+ * @version svn: $Id$
+ * @link http://openpear.org/package/Keires_Feed
+ * @since File available since Release 0.1
+ */
+
+require_once 'Keires/FeedParser.php';
+require_once 'Keires/ExceptionWrapper.php';
+require_once 'HTTP/Request.php'; // PEAR::HTTP_Request
+
+/*
+ * Error codes
+ */
+define('KEIRES_ERROR_INVALID_PARAM', 1);
+define('KEIRES_ERROR_OPEN_FILE', 2);
+define('KEIRES_ERROR_EMPTY_CONTENTS', 3);
+define('KEIRES_ERROR_UNPARSED', 4);
+define('KEIRES_ERROR_INVALID_XML', 5);
+define('KEIRES_ERROR_DOM_LOADING', 6);
+
+/*
+ * Utility function
+ */
+if (!function_exists('array_val')) {
+ function array_val(&$data, $key, $default = null) {
+ if (!is_array($data)) {
+ return $default;
+ }
+ return isset($data[$key])? $data[$key]: $default;
+ }
+}
+
+/*
+ * Excepiton
+ */
+class Keires_Exception extends Exception {
+ // empty
+}
+
+class Keires_Feed {
+ protected $contents = null;
+ protected $parser = null;
+
+ public function __construct($url, $options = null) {
+ $noreq = array_val($options, 'noreq', false);
+ $ua = array_val($options, 'user_agent');
+
+ if ($noreq) {
+ return;
+ }
+
+ if (empty($url)) {
+ throw new Keires_Exception('url is required',
+ KEIRES_ERROR_INVALID_PARAM);
+ }
+
+ $http_opts = array(
+ 'allowRedirects' => true,
+ );
+ $req = new HTTP_Request($url, $http_opts);
+ if (!empty($ua)) {
+ $req->addHeader('User-Agent', $ua);
+ }
+ $result = $req->sendRequest();
+ if (PEAR::isError($result)) {
+ Keires_ExceptionWrapper::handleError($result);
+ }
+
+ $code = $req->getResponseCode();
+ if ($code != 200) {
+ throw new Keires_Exception('path open failed: '. $url,
+ KEIRES_ERROR_OPEN_FILE);
+ }
+
+ $cont = $req->getResponseBody();
+ if ($cont === false) {
+ throw new Keires_Exception('path open failed: '. $url,
+ KEIRES_ERROR_OPEN_FILE);
+ }
+
+ $this->contents = $cont;
+ }
+
+ public function setContents($contents) {
+ $this->contents = $contents;
+ }
+
+ public function getContents() {
+ return $this->contents;
+ }
+
+ public function parse($options = null) {
+ if (empty($this->contents)) {
+ throw new Keires_Exception('empty contents',
+ KEIRES_ERROR_EMPTY_CONTENTS);
+ }
+ $parser = new Keires_FeedParser($this->contents, $options);
+ $parser->parse();
+
+ $this->parser = $parser;
+ }
+
+ public function getItems() {
+ if (empty($this->parser)) {
+ throw new Keires_Exception('Not parsed yet',
+ KEIRES_ERROR_UNPARSED);
+ }
+ $type = $this->parser->type();
+ $item = $this->parser->items();
+ return new Keires_FeedItem($item, $type);
+ }
+}
+
+class Keires_FeedItem implements Iterator {
+ protected $pos = 0;
+ protected $type = null;
+ protected $items = null;
+
+ public function __construct($items, $type) {
+ if (empty($items) || empty($type)) {
+ throw new Keires_Exception('Empty params',
+ KEIRES_ERROR_INVALID_PARAM);
+ }
+ $this->type = $type;
+ $this->items = $items;
+ }
+
+ static protected function getVal($item, $key) {
+ $v = array_val($item, $key);
+ for (;;) {
+ if (is_array($v)) {
+ $v = $v[0];
+ } else {
+ break;
+ }
+ }
+ return $v;
+ }
+
+ static public function getValKeys($item, $keys) {
+ if (!is_array($keys)) {
+ return null;
+ }
+ foreach ($keys as $key) {
+ $data = self::getVal($item, $key);
+ if (!empty($data)) {
+ return $data;
+ }
+ }
+ return null;
+ }
+
+ static public function parseEntryRSS($item) {
+ $datekeys = array('date', 'pubDate');
+ $pubdate = self::getValKeys($item, $datekeys);
+ $entry = array(
+ 'url' => self::getVal($item, 'link'),
+ 'title' => self::getVal($item, 'title'),
+ 'description' => self::getVal($item, 'description'),
+ 'content' => self::getVal($item, 'encoded'),
+ 'creator' => self::getVal($item, 'creator'),
+ 'pubdate' => $pubdate,
+ );
+ return $entry;
+ }
+
+ static public function parseEntryATOM($item) {
+ // get url
+ $link = $item['link'];
+ $url = null;
+
+ // for feedburner
+ $url = self::getVal($item, 'origLink');
+
+ if (empty($url)) {
+ // first: find <link rel="alternate">
+ foreach ($link['rel'] as $idx => $rel) {
+ if ($rel === 'alternate') {
+ $url = $link['href'][$idx];
+ break;
+ }
+ }
+ }
+ if (empty($url)) {
+ // second: <link type="text/html">
+ foreach ($link['type'] as $idx => $type) {
+ if (($type === 'text/html') ||
+ ($type === 'application/xhtml+xml')){
+ $url = $link['href'][$idx];
+ break;
+ }
+ }
+ }
+ if (empty($url)) {
+ // for lifehacking.jp
+ $url = self::getVal($item, 'id');
+ }
+ // get published date
+ $datekeys = array(
+ 'created',
+ 'published',
+ 'modified',
+ 'issued',
+ );
+ $pubdate = self::getValKeys($item, $datekeys);
+ $entry = array(
+ 'url' => $url,
+ 'title' => self::getVal($item, 'title'),
+ 'description' => self::getVal($item, 'summary'),
+ 'creator' => self::getVal($item, 'author'),
+ 'pubdate' => $pubdate,
+ );
+ return $entry;
+ }
+
+ public function rewind() {
+ $this->pos = 0;
+ }
+
+ public function current() {
+ $item = $this->items[$this->pos];
+ $entry = null;
+ if (strncmp($this->type, 'rss', 3) === 0) {
+ $entry = self::parseEntryRSS($item);
+ } else if (strncmp($this->type, 'atom', 4) === 0) {
+ $entry = self::parseEntryATOM($item);
+ }
+ return $entry;
+ }
+
+ public function key() {
+ return $this->pos;
+ }
+
+ public function next() {
+ ++$this->pos;
+ }
+
+ public function valid() {
+ return ($this->pos < count($this->items));
+ }
+}
+
+?>
属性に変更があったパス: Keires_Feed/trunk/src/Keires/Feed.php
___________________________________________________________________
追加: svn:keywords
+ Id
追加: svn:eol-style
+ native
Keires_Feed/trunk/examples/feedparser.php
@@ -0,0 +1,27 @@
+<?php
+// $Id$
+
+require_once 'Keires/FeedParser.php';
+
+function usage() {
+ echo "Usage: program feed_url\n";
+}
+
+try {
+ if ($argc != 2) {
+ usage();
+ exit;
+ }
+
+ $url = $argv[1];
+ $xml = file_get_contents($url);
+
+ $parser = new Keires_FeedParser($xml);
+ $parser->parse();
+ var_dump($parser->getFeed());
+
+} catch (Exception $e) {
+ die($e->getMessage());
+}
+
+?>
\ No newline at end of file
属性に変更があったパス: Keires_Feed/trunk/examples/feedparser.php
___________________________________________________________________
追加: svn:keywords
+ Id
追加: svn:eol-style
+ native
Keires_Feed/trunk/examples/feed.php
@@ -0,0 +1,39 @@
+<?php
+// $Id$
+
+require_once 'Keires/Feed.php';
+
+function usage() {
+ echo "Usage: program feed_url\n";
+}
+
+try {
+ if ($argc != 2) {
+ usage();
+ exit;
+ }
+
+ $url = $argv[1];
+ $xml = file_get_contents($url);
+
+ $opt = array(
+ 'noreq' => true,
+ );
+ $feed = new Keires_Feed(null, $opt);
+ $feed->setContents($xml);
+
+ $feed->parse();
+
+ $items = $feed->getItems();
+
+ foreach ($items as $item) {
+ var_dump($item);
+ }
+
+} catch (Exception $e) {
+ die($e->getMessage());
+ }
+
+
+
+?>
\ No newline at end of file
属性に変更があったパス: Keires_Feed/trunk/examples/feed.php
___________________________________________________________________
追加: svn:keywords
+ Id
追加: svn:eol-style
+ native