powered by nequal
Home » File_HyperEstraier » Timeline » 178

Diffs

File_HyperEstraier/trunk/HyperEstraier/Draft.php

@@ -0,0 +1,153 @@
+<?php
+/**
+ * @package File_HyperEstraier
+ */
+/**
+ * HyperEstraier document draft parser.
+ */
+class File_HyperEstraier_Draft {
+	private $attrs = array();
+	private $kwords = array();
+	private $socre = array();
+	private $shadow = array();
+
+	protected $texts = array();
+	private $hiddens = array();
+
+	/**
+	 * Load draft data.
+	 * @param string $string Document draft string.
+	 * @return boolean True on success.
+	 */
+	function load($string){
+		$attrs = array();
+		$kwords = array();
+		$score = array();
+		$shadow = array();
+
+		$hiddens = array();
+		$texts = array();
+
+		// detect EOL
+		$eol = "\n";
+		if(strpos($string, "\r\n")){
+			$eol = "\r\n";
+		}elseif(strpos($string, "\r")){
+			$eol = "\r";
+		}
+
+		$is_header = true;
+		foreach(explode($eol, $string) as $line){
+			if(strlen($line)==0){
+				$is_header = false;
+				continue;
+			}
+			if($is_header){
+				if($line[0]=='%'){
+					if(substr($line,0,7)=='%VECTOR'){
+						$kv=explode("\t",$line);
+						for($i=0;$i<(count($kv)-1)/2;$i++){
+							$kwords[$kv[2*$i+1]]=$kv[2*$i+2];
+						}
+					}elseif(substr($line,0,6)=='%SCORE'){
+						$kv=explode("\t",$line);
+						$score=$kv[1];
+					}elseif(substr($line,0,7)=='%SHADOW'){
+						$kvs=explode("\t",$line);
+						$shadow[$kvs[1]]=$kvs[2];
+					}else{
+						trigger_error("The library does not know the header. ".$line, E_USER_NOTICE);
+						return false;
+					}
+					continue;
+				}
+				$kv=explode('=',$line,2);
+				if(isset($kv[1])){
+					$attrs[$kv[0]]=$kv[1];
+				}else{
+					trigger_error("Parse error in hyperestraier document draft header.", E_USER_NOTICE);
+					return false;
+				}
+			}else{
+				if($line[0]=="\t"){ // hidden text
+					$hiddens[] = substr($line, 1);
+				}else{
+					$texts[] = $line;
+				}
+			}
+		}
+
+		$this->attrs = $attrs;
+		$this->kwords = $kwords;
+		$this->score = $score;
+		$this->shadow = $shadow;
+
+		$this->hiddens = $hiddens;
+		$this->texts = $texts;
+
+		return true;
+	}
+
+	/**
+	 * Dump draft data.
+	 * @return string Document draft
+	 */
+	function dump(){
+		trigger_error("Not implemented yet.", E_USER_ERROR);
+	}
+
+	/**
+	 * Get a list of attribute names.
+	 * @return array List of attribute names.
+	 */
+	function getAttributeNames(){
+		return keys($this->attrs);
+	}
+
+	/**
+	 * Get the value of an attribute.
+	 * @param string $name attribute name.
+	 * @return string The value.
+	 */
+	function getAttribute($name){
+		if(isset($this->attrs[$name])){
+			return $this->attrs[$name];
+		}else{
+			return false;
+		}
+	}
+
+	/**
+	 * Set the value of an attribute.
+	 * @param string $name attribute name.
+	 * @param string $value attribute value.
+	 */
+	function setAttribute($name, $value){
+		$this->attrs[$name]=$value;
+	}
+
+	/**
+	 * Add a text.
+	 * @param string $text The text to add.
+	 * @param boolean $hidden
+	 */
+	function addText($text, $hidden=false){
+		if($hidden){
+			$this->hiddens[]=$text;
+		}else{
+			$this->texts[]=$text;
+		}
+	}
+
+	/**
+	 * Get document part in one string
+	 * @return string Document part.
+	 */
+	function getDocument(){
+		$texts = $this->texts;
+		foreach($hiddens as $h){
+			$texts[] = "\t".$h;
+		}
+		return join("\n", $texts);
+	}
+}

File_HyperEstraier/trunk/HyperEstraier/Snippet.php

@@ -0,0 +1,21 @@
+<?php
+/**
+ * @package File_HyperEstraier
+ */
+require_once("File/HyperEstraier/Draft.php");
+/**
+ * HyperEstraier snippet parser.
+ */
+class File_HyperEstraier_Snippet extends File_HyperEstraier_Draft {
+	/**
+	 *
+	 */
+	function getPlainSnippet(){
+		$rows = array();
+		foreach($this->texts as $doc){
+			$snp=split("\t", $doc , 2);
+			$rows[] = $snp[0];
+		}
+		return join("",$rows);
+	}
+}

File_HyperEstraier/trunk/HyperEstraier/SearchResult.php

@@ -0,0 +1,91 @@
+<?php
+/**
+ * @package File_HyperEstraier
+ */
+require_once('File/HyperEstraier/Snippet.php');
+/**
+ * HyperEstraier search result parser.
+ *
+ * This class parse the output of estmaster search result.
+ */
+class File_HyperEstraier_SearchResult implements SeekableIterator {
+	private $docs = array();
+	private $hint = array();
+	private $pos = 0;
+
+	/**
+	 * Load draft data.
+	 * @param string $string Search result string.
+	 * @return boolean True on success.
+	 */
+	function load($string){
+		$lines=explode("\n", $string);
+		if(!isset($lines[0])){ return null; }
+		$separator=$lines[0];
+		$parts_str=explode($separator, $string);
+		$ct=0;
+		foreach($parts_str as $str){
+			if(strpos($str,':END')===0){ break; }
+			$str=substr($str,1);
+			if($ct==0){
+				// always empty because this is the very beginning of the document part.
+			}elseif($ct==1){
+				// meta part
+				$lines=explode("\n",$str);
+				foreach($lines as $line){
+					if(!$line){ continue; }
+					$kv=explode("\t",$line,2);
+					$this->hint[$kv[0]]=$kv[1];
+				}
+			}else{
+				// snippet part
+				$snippet = new File_HyperEstraier_Snippet();
+				$snippet->load($str);
+ 				$this->docs[] = $snippet;
+			}
+			$ct++;
+		}
+		return true;
+	}
+
+	/** Iterator */
+	function current(){
+		return current($this->docs);
+	}
+
+	/** Iterator */
+	function key(){
+		return $this->pos;
+	}
+
+	/** Iterator */
+	function next(){
+		$this->pos++;
+	}
+
+	/** Iterator */
+	function rewind(){
+		$this->pos=0;
+	}
+
+	/** Iterator */
+	function valid(){
+		if($this->pos < count($this->docs)){
+			return true;
+		}
+		return false;
+	}
+
+	/** SeekableIterator */
+	function seek($index){
+		$this->rewind();
+		$position = 0;
+		while($position < $index && $this->valid()) {
+			$this->next();
+			$position++;
+		}
+		if (!$this->valid()) {
+			throw new OutOfBoundsException('Invalid seek position');
+		}
+	}
+}