powered by nequal
Home » Keires_Feed » Timeline » 395

Diffs

Keires_Feed/trunk/src/Keires/ExceptionWrapper.php

@@ -0,0 +1,60 @@
+<?php
+/**
+  * Keires_Feed
+  *
+  * Feed Parser like XML_Feed_Parser
+  *
+  * PHP version 5
+  *
+  * LICENSE: This source file is subject to version 3.0 of the PHP license
+  * that is available through the world-wide-web at the following URI:
+  * http://www.php.net/license/3_0.txt.  If you did not receive a copy of
+  * the PHP License and are unable to obtain it through the web, please
+  * send a note to license@php.net so we can mail you a copy immediately.
+  *
+  * @category   XML
+  * @package    Keires_Feed
+  * @author     KOYAMA Tetsuji <koyama@hoge.hoge.org>
+  * @copyright  2008-2009 KOYAMA Tetsuji
+  * @license    http://www.php.net/license/3_0.txt  PHP License 3.0
+  * @version    svn: $Id$
+  * @link       http://openpear.org/package/Keires_Feed
+  * @since      File available since Release 0.1
+  */
+
+require_once 'PEAR.php';
+require_once 'PEAR/Exception.php';
+
+class Keires_ExceptionWrapper {
+
+    static public function init() {
+        static $initialized = false;
+        if (!$initialized) {
+            PEAR::setErrorHandling(PEAR_ERROR_CALLBACK,
+                                   array('Keires_ExceptionWrapper',
+                                         'handleError'));
+            $initialized = true;
+        }
+    }
+
+    static public function wrap($obj) {
+        if (is_a($obj, 'PEAR')) {
+            $obj->setErrorHandling(PEAR_ERROR_CALLBACK,
+                                   array('Keires_ExceptionWrapper',
+                                         'handleError'));
+        }
+    }
+
+    static public function handleError($err) {
+        throw self::factory($err);
+    }
+
+    static public function factory($err) {
+        // default exception class
+        $class = 'PEAR_Exception';
+
+        return new $class($err->getMessage(), $err->getCode());
+    }
+}
+
+?>
\ No newline at end of file
属性に変更があったパス: Keires_Feed/trunk/src/Keires/ExceptionWrapper.php
___________________________________________________________________
追加: svn:keywords
+ Id
追加: svn:eol-style
+ native

Keires_Feed/trunk/src/Keires/FeedParser.php

@@ -0,0 +1,239 @@
+<?php
+/**
+  * Keires_Feed
+  *
+  * Feed Parser like XML_Feed_Parser
+  *
+  * PHP version 5
+  *
+  * LICENSE: This source file is subject to version 3.0 of the PHP license
+  * that is available through the world-wide-web at the following URI:
+  * http://www.php.net/license/3_0.txt.  If you did not receive a copy of
+  * the PHP License and are unable to obtain it through the web, please
+  * send a note to license@php.net so we can mail you a copy immediately.
+  *
+  * @category   XML
+  * @package    Keires_Feed
+  * @author     KOYAMA Tetsuji <koyama@hoge.hoge.org>
+  * @copyright  2008-2009 KOYAMA Tetsuji
+  * @license    http://www.php.net/license/3_0.txt  PHP License 3.0
+  * @version    svn: $Id$
+  * @link       http://openpear.org/package/Keires_Feed
+  * @since      File available since Release 0.1
+  *
+  * based on Rasmus Lerdorf's simple_rss.php
+  *  ( http://lerdorf.com/php/simple_rss.phps
+  */
+
+require_once 'Keires/Feed.php';
+
+class Keires_FeedParser {
+
+    protected $xmlversion = '1.0';
+    protected $encoding = null;
+    protected $xml = null;
+    protected $feed = null;
+
+    public function __construct($xml, $options = null) {
+        $this->xml = $xml;
+        $this->analyze();
+    }
+
+    public function getFeed() {
+        return $this->feed;
+    }
+
+    static protected function firstLine(&$text) {
+        $p1 = strpos($text, "\r");
+        $p2 = strpos($text, "\n");
+        $p = FALSE;
+
+        if ($p1 === FALSE) {
+            $p = $p2;
+        } else {
+            if ($p2 === FALSE) {
+                $p = $p1;
+            } else {
+                $p = min($p1, $p2);
+            }
+        }
+        if ($p === FALSE) {
+            // no or long length line
+            return substr($text, 0, 256);
+        }
+        return substr($text, 0, $p);
+    }
+
+    protected function analyze() {
+        $xml = $this->xml;
+
+        $first_line = $this->firstLine($xml);
+        $nmatch = preg_match('!<?xml version=[\'"](.+)[\'"] encoding=[\'"](.+)[\'"]\s*\?>!i',
+                             $first_line, $matches);
+        if ($nmatch != 1) {
+            throw new Keires_Exception('Invalid XML document',
+                                       KEIRES_ERROR_INVALID_XML);
+        }
+
+        $this->xmlversion = $matches[1];
+        $this->encoding = $matches[2];
+    }
+
+    public function parse() {
+        $options = array(
+            'version' => $this->xmlversion,
+            'encoding' => $this->encoding,
+            );
+        $feed = self::parseXML($this->xml, $options);
+        $this->feed = $feed;
+    }
+
+    public function type() {
+        if (empty($this->feed)) {
+            throw new Keires_Exception('feed is not parsed yet',
+                                       KEIRES_ERROR_UNPARSED);
+        }
+        return $this->feed['_type'];
+    }
+
+    public function items() {
+        if (empty($this->feed)) {
+            throw new Keires_Exception('feed not parsed yet',
+                                       KEIRES_ERROR_UNPARSED);
+        }
+        return $this->feed['item'];
+    }
+
+    static public function feedType($dom) {
+        $type = null;
+        foreach ($dom->childNodes as $child) {
+            $node = $child->nodeName;
+            switch ($node) {
+            case 'rss':
+                $type = 'rss';
+                break;
+            case 'feed':
+                $type = 'atom';
+                break;
+            default:
+                if (strncmp($node, 'rdf', 3) === 0) {
+                    $type = 'rss';
+                }
+            }
+            if (!empty($type)) {
+                break;
+            }
+        }
+        return $type;
+    }
+
+    static public function parseXML($input_xml, $options) {
+        $version = array_val($options, 'version', '1.0');
+        $encoding = array_val($options, 'encoding', 'UTF-8');
+
+        $dom = new DOMDocument($version, $encoding);
+
+        $result = $dom->loadXML($input_xml);
+        if ($result === FALSE) {
+            throw new Keires_Exception('DOMDocument load error',
+                                       KEIRES_ERROR_DOM_LOADING);
+        }
+        $feedtype = self::feedType($dom);
+
+        // Pick out the namespaces that apply to this doc.
+        // We need to do this from DOM because simplexml does't see the
+        // special xmlns attributes because of the way libxml2 handles them.
+        $xpath = new DOMXPath($dom);
+        $ns = array(''=>NULL);
+        foreach($xpath->query("namespace::*") as $v) {
+            if($v->localName!='xml') $ns[$v->localName] = $v->nodeValue;
+        }
+        $atom10 = $atom03 = false;
+        if ($feedtype != 'rss') {
+            if (in_array('http://www.w3.org/2005/Atom', $ns)) {
+                $atom10 = true;
+                unset($ns['xmlns']);
+            } else if (in_array('http://purl.org/atom/ns#', $ns)) {
+                $atom03 = true;
+                unset($ns['xmlns']);
+            }
+        }
+
+        // Ok, now we can switch to simplexml
+        $xml = simplexml_import_dom($dom);
+        if ($xml === null) {
+            throw new Keires_Exception('XML format error',
+                                       KEIRES_ERROR_INVALID_XML);
+        }
+        $feed = array();
+
+        // Pull out the root attributes - usually just version
+        foreach($xml->attributes() as $k=>$v) $feed[$k] = (string)$v;
+
+        // We will deal with the items separately, so start by only looking
+        // at the stuff leading up to the items checking each namespace.
+        $rss1 = false;
+        if ($atom10 || $atom03) {
+            $top = $xml;
+        } else {
+            $top = $xml->channel;
+        }
+        foreach($ns as $alias=>$uri) foreach($top->children($uri) as $key=>$val) {
+            if($key=="item" || $key=="entry") continue;
+            if($key=="items") {
+                $rss1 = true; continue;
+            }
+            if(!$val->children()) {
+                $feed[$key][0] = (string)$val;
+                foreach($ns as $a=>$u) foreach($val->attributes($u) as $at=>$atv) {
+                    $feed[$key][$at] = (string)$atv;
+                }
+            } else {
+                foreach($val->children() as $k=>$v) {
+                    $feed[$key][$k] = (string)$v;
+                    foreach($v->attributes() as $at=>$atv) {
+                        $feed[$k][$at] = (string)$atv;
+                    }
+                }
+            }
+        }
+
+        // Now we deal with the items
+        // Atom and RSS1 have the feed items a level higher than RSS2
+        $i = 0;
+        if($rss1) { $feed['_type']='rss1.0'; $items = $xml->item; }
+        else if($atom10) { $feed['_type']='atom1.0'; $items = $xml->entry; }
+        else if($atom03) { $feed['_type']='atom0.3'; $items = $xml->entry; }
+        else {
+            if($feed['version']=='2.0') $feed['_type']='rss2.0';
+            else if($feed['version']=='0.91') $feed['_type']='rss0.91';
+            $items = $xml->channel->item;
+        }
+        foreach($items as $key=>$val) {
+            foreach($ns as $a=>$u) foreach($val->attributes($u) as $at=>$atv) {
+                $feed['item'][$i][$at] = (string)$atv;
+            }
+            foreach($ns as $alias=>$uri) {
+                foreach($val->children($uri) as $k=>$v) {
+                    $feed['item'][$i][$k][0] = (string)$v;
+                    foreach($v->attributes() as $at=>$atv) {
+                        $at_val = (string)$atv;
+                        if ($atom10 || $atom03) {
+                            $feed['item'][$i][$k][$at][] = $at_val;
+                            // Don't even try parsing this stuff, just pass it through.
+                            if($at_val=='xhtml' || $at_val=='html' || $at_val=='text') {
+                                $tags = $v->children();
+                                $feed['item'][$i][$k]['text'] = $tags->asXML();
+                            }
+                        } else $feed['item'][$i][$k][$at] = $at_val;
+
+                    }
+                }
+            }
+            $i++;
+        }
+        return $feed;
+    }
+}
+
+?>
属性に変更があったパス: Keires_Feed/trunk/src/Keires/FeedParser.php
___________________________________________________________________
追加: svn:keywords
+ Id
追加: svn:eol-style
+ native

Keires_Feed/trunk/src/Keires/Feed.php

@@ -0,0 +1,262 @@
+<?php
+/**
+  * Keires_Feed
+  *
+  * Feed Parser like XML_Feed_Parser
+  *
+  * PHP version 5
+  *
+  * LICENSE: This source file is subject to version 3.0 of the PHP license
+  * that is available through the world-wide-web at the following URI:
+  * http://www.php.net/license/3_0.txt.  If you did not receive a copy of
+  * the PHP License and are unable to obtain it through the web, please
+  * send a note to license@php.net so we can mail you a copy immediately.
+  *
+  * @category   XML
+  * @package    Keires_Feed
+  * @author     KOYAMA Tetsuji <koyama@hoge.hoge.org>
+  * @copyright  2008-2009 KOYAMA Tetsuji
+  * @license    http://www.php.net/license/3_0.txt  PHP License 3.0
+  * @version    svn: $Id$
+  * @link       http://openpear.org/package/Keires_Feed
+  * @since      File available since Release 0.1
+  */
+
+require_once 'Keires/FeedParser.php';
+require_once 'Keires/ExceptionWrapper.php';
+require_once 'HTTP/Request.php';  // PEAR::HTTP_Request
+
+/*
+ * Error codes
+ */
+define('KEIRES_ERROR_INVALID_PARAM',   1);
+define('KEIRES_ERROR_OPEN_FILE',       2);
+define('KEIRES_ERROR_EMPTY_CONTENTS',  3);
+define('KEIRES_ERROR_UNPARSED',        4);
+define('KEIRES_ERROR_INVALID_XML',     5);
+define('KEIRES_ERROR_DOM_LOADING',     6);
+
+/*
+ * Utility function
+ */
+if (!function_exists('array_val')) {
+    function array_val(&$data, $key, $default = null) {
+        if (!is_array($data)) {
+            return $default;
+        }
+        return isset($data[$key])? $data[$key]: $default;
+    }
+}
+
+/*
+ * Excepiton
+ */
+class Keires_Exception extends Exception {
+    // empty
+}
+
+class Keires_Feed {
+    protected $contents = null;
+    protected $parser = null;
+
+    public function __construct($url, $options = null) {
+        $noreq = array_val($options, 'noreq', false);
+        $ua = array_val($options, 'user_agent');
+
+        if ($noreq) {
+            return;
+        }
+
+        if (empty($url)) {
+            throw new Keires_Exception('url is required',
+                                       KEIRES_ERROR_INVALID_PARAM);
+        }
+
+        $http_opts = array(
+            'allowRedirects' => true,
+            );
+        $req = new HTTP_Request($url, $http_opts);
+        if (!empty($ua)) {
+            $req->addHeader('User-Agent', $ua);
+        }
+        $result = $req->sendRequest();
+        if (PEAR::isError($result)) {
+            Keires_ExceptionWrapper::handleError($result);
+        }
+
+        $code = $req->getResponseCode();
+        if ($code != 200) {
+            throw new Keires_Exception('path open failed: '. $url,
+                                       KEIRES_ERROR_OPEN_FILE);
+        }
+
+        $cont = $req->getResponseBody();
+        if ($cont === false) {
+            throw new Keires_Exception('path open failed: '. $url,
+                                       KEIRES_ERROR_OPEN_FILE);
+        }
+
+        $this->contents = $cont;
+    }
+
+    public function setContents($contents) {
+        $this->contents = $contents;
+    }
+
+    public function getContents() {
+        return $this->contents;
+    }
+
+    public function parse($options = null) {
+        if (empty($this->contents)) {
+            throw new Keires_Exception('empty contents',
+                                       KEIRES_ERROR_EMPTY_CONTENTS);
+        }
+        $parser = new Keires_FeedParser($this->contents, $options);
+        $parser->parse();
+
+        $this->parser = $parser;
+    }
+
+    public function getItems() {
+        if (empty($this->parser)) {
+            throw new Keires_Exception('Not parsed yet',
+                                       KEIRES_ERROR_UNPARSED);
+        }
+        $type = $this->parser->type();
+        $item = $this->parser->items();
+        return new Keires_FeedItem($item, $type);
+    }
+}
+
+class Keires_FeedItem implements Iterator {
+    protected $pos = 0;
+    protected $type = null;
+    protected $items = null;
+
+    public function __construct($items, $type) {
+        if (empty($items) || empty($type)) {
+            throw new Keires_Exception('Empty params',
+                                       KEIRES_ERROR_INVALID_PARAM);
+        }
+        $this->type = $type;
+        $this->items = $items;
+    }
+
+    static protected function getVal($item, $key) {
+        $v = array_val($item, $key);
+        for (;;) {
+            if (is_array($v)) {
+                $v = $v[0];
+            } else {
+                break;
+            }
+        }
+        return $v;
+    }
+
+    static public function getValKeys($item, $keys) {
+        if (!is_array($keys)) {
+            return null;
+        }
+        foreach ($keys as $key) {
+            $data = self::getVal($item, $key);
+            if (!empty($data)) {
+                return $data;
+            }
+        }
+        return null;
+    }
+
+    static public function parseEntryRSS($item) {
+        $datekeys = array('date', 'pubDate');
+        $pubdate = self::getValKeys($item, $datekeys);
+        $entry = array(
+            'url'         => self::getVal($item, 'link'),
+            'title'       => self::getVal($item, 'title'),
+            'description' => self::getVal($item, 'description'),
+            'content'     => self::getVal($item, 'encoded'),
+            'creator'     => self::getVal($item, 'creator'),
+            'pubdate'     => $pubdate,
+            );
+        return $entry;
+    }
+
+    static public function parseEntryATOM($item) {
+        // get url
+        $link = $item['link'];
+        $url = null;
+
+        // for feedburner
+        $url = self::getVal($item, 'origLink');
+
+        if (empty($url)) {
+            // first: find <link rel="alternate">
+            foreach ($link['rel'] as $idx => $rel) {
+                if ($rel === 'alternate') {
+                    $url = $link['href'][$idx];
+                    break;
+                }
+            }
+        }
+        if (empty($url)) {
+            // second: <link type="text/html">
+            foreach ($link['type'] as $idx => $type) {
+                if (($type === 'text/html') ||
+                    ($type === 'application/xhtml+xml')){
+                    $url = $link['href'][$idx];
+                    break;
+                }
+            }
+        }
+        if (empty($url)) {
+            // for lifehacking.jp
+            $url = self::getVal($item, 'id');
+        }
+        // get published date
+        $datekeys = array(
+            'created',
+            'published',
+            'modified',
+            'issued',
+            );
+        $pubdate = self::getValKeys($item, $datekeys);
+        $entry = array(
+            'url'         => $url,
+            'title'       => self::getVal($item, 'title'),
+            'description' => self::getVal($item, 'summary'),
+            'creator'     => self::getVal($item, 'author'),
+            'pubdate'     => $pubdate,
+            );
+        return $entry;
+    }
+
+    public function rewind() {
+        $this->pos = 0;
+    }
+
+    public function current() {
+        $item = $this->items[$this->pos];
+        $entry = null;
+        if (strncmp($this->type, 'rss', 3) === 0) {
+            $entry = self::parseEntryRSS($item);
+        } else if (strncmp($this->type, 'atom', 4) === 0) {
+            $entry = self::parseEntryATOM($item);
+        }
+        return $entry;
+    }
+
+    public function key() {
+        return $this->pos;
+    }
+
+    public function next() {
+        ++$this->pos;
+    }
+
+    public function valid() {
+        return ($this->pos < count($this->items));
+    }
+}
+
+?>
属性に変更があったパス: Keires_Feed/trunk/src/Keires/Feed.php
___________________________________________________________________
追加: svn:keywords
+ Id
追加: svn:eol-style
+ native

Keires_Feed/trunk/examples/feedparser.php

@@ -0,0 +1,27 @@
+<?php
+// $Id$
+
+require_once 'Keires/FeedParser.php';
+
+function usage() {
+    echo "Usage: program feed_url\n";
+}
+
+try {
+    if ($argc != 2) {
+        usage();
+        exit;
+    }
+
+    $url = $argv[1];
+    $xml = file_get_contents($url);
+
+    $parser = new Keires_FeedParser($xml);
+    $parser->parse();
+    var_dump($parser->getFeed());
+
+} catch (Exception $e) {
+    die($e->getMessage());
+}
+
+?>
\ No newline at end of file
属性に変更があったパス: Keires_Feed/trunk/examples/feedparser.php
___________________________________________________________________
追加: svn:keywords
+ Id
追加: svn:eol-style
+ native

Keires_Feed/trunk/examples/feed.php

@@ -0,0 +1,39 @@
+<?php
+// $Id$
+
+require_once 'Keires/Feed.php';
+
+function usage() {
+    echo "Usage: program feed_url\n";
+}
+
+try {
+    if ($argc != 2) {
+        usage();
+        exit;
+    }
+
+    $url = $argv[1];
+    $xml = file_get_contents($url);
+
+    $opt = array(
+        'noreq' => true,
+        );
+    $feed = new Keires_Feed(null, $opt);
+    $feed->setContents($xml);
+
+    $feed->parse();
+
+    $items = $feed->getItems();
+
+    foreach ($items as $item) {
+        var_dump($item);
+    }
+
+} catch (Exception $e) {
+    die($e->getMessage());
+  }
+
+
+
+?>
\ No newline at end of file
属性に変更があったパス: Keires_Feed/trunk/examples/feed.php
___________________________________________________________________
追加: svn:keywords
+ Id
追加: svn:eol-style
+ native