. * * This program is free software; you can redistribute it and/or * modify it under the same terms as PHP. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * @author Chuck Hagenbuch * @version $Revision: 1.10 $ * @since Jonah 0.0.1 */ class Jonah_headlines { // {{{ properties /** Boolean indicating whether or not we should shout about unrecognized tags, or just go on. */ var $debug = false; /** The Resource ID of the xml parser. */ var $parser; /** The filehandle of the data file being parsed. */ var $fp; /** The current parent tag - CHANNEL, STORY, etc. */ var $parent = ''; /** The current child tag - TITLE, DESCRIPTION, URL, etc. */ var $child = ''; /** All the attributes of the channel description. */ var $channel; /** All the attributes of the channel image. */ var $image; /** All the attributes of the current story being parsed. */ var $story; /** All the attributes of the current item being parsed. */ var $item; /** The array that all the parsed information gets dumped into. */ var $structure; /** Holds an error string, if any. */ var $error; // }}} // {{{ constructor /** * Constructs a new Jonah_headlines parser object. */ function Jonah_headlines () { // initialize some variables $this->channel = array(); $this->image = array(); $this->item = array(); $this->story = array(); } // }}} /** * Initialize the XML parser. * * @param $file The XML file to parse as an RSS/RDF file. This can * be a local file, or a remote file if you have the * fopen wrappers enabled. */ function init($file) { // create the XML parser $this->parser = xml_parser_create(); xml_set_object($this->parser, $this); xml_parser_set_option($this->parser, XML_OPTION_CASE_FOLDING, true); xml_set_element_handler($this->parser, 'start_element', 'end_element'); xml_set_character_data_handler($this->parser, 'character_data'); xml_set_default_handler($this->parser, 'default_handler'); // i think i can safely disable these xml_set_processing_instruction_handler($this->parser, ''); xml_set_external_entity_ref_handler($this->parser, ''); // open the input file if (!($this->fp = @fopen($file, 'r'))) { $this->error = 'Could not open input file'; return false; } } /** * Clean up any existing data - reset to a state where we can * cleanly open a new file. */ function cleanup() { $this->channel = array(); $this->image = array(); $this->item = array(); $this->story = array(); $this->structure = array(); } /** * Actually do the parsing. Seperated from the constructor just in * case you want to set any other options on the parser, load * initial data, whatever. * * @param $file The XML file to parse as an RSS/RDF file. This can * be a local file, or a remote file if you have the * fopen wrappers enabled. */ function parse ($file) { $this->init($file); // sanity checks if (!$this->parser) { $this->error = 'Could not find xml parser handle'; return false; } if (!$this->fp) { $this->error = 'Could not find input file handle'; return false; } // parse away while ($data = fread($this->fp, 4096)) { if (!xml_parse($this->parser, $data, feof($this->fp))) { $this->error = sprintf('XML error: %s at line %d', xml_error_string(xml_get_error_code($this->parser)), xml_get_current_line_number($this->parser)); return false; } } // mop up xml_parser_free($this->parser); } /** * Start collecting data about a new element. */ function start_element ($parser, $name, $attribs) { switch ($name) { case 'CHANNEL': case 'IMAGE': case 'ITEM': case 'STORY': case 'TEXTINPUT': $this->parent = $name; break; case 'LINUXTODAY': case 'STORYLIST': $this->structure['type'] = strtolower($name); break; case 'TITLE': case 'RATING': case 'DESCRIPTION': case 'URL': case 'LINK': case 'WIDTH': case 'HEIGHT': case 'LANGUAGE': case 'NAME': case 'MANAGINGEDITOR': case 'WEBMASTER': case 'COPYRIGHT': case 'PUBDATE': case 'LASTBUILDDATE': // stuff for LinuxToday case 'TIME': case 'AUTHOR': case 'TOPIC': case 'COMMENTS': // userland story format case 'POSTTIME': case 'CHANNELTITLE': case 'CHANNELURL': case 'USERLANDCHANNELID': case 'STORYTEXT': $this->child = strtolower($name); break; case 'RDF:RDF': case 'RSS': // rss version? can ignore... break; default: if ($this->debug) echo 'found something unexpected: ' . htmlspecialchars($name) . '. fix this.
' . "\n"; break; } } /** * Handle the ends of XML elements - wrap up whatever we've been * putting together and store it for safekeeping. */ function end_element ($parser, $name) { switch ($name) { case 'CHANNEL': $this->structure['channel'] = $this->channel; break; case 'IMAGE': $this->structure['image'] = $this->image; break; case 'STORY': $this->structure['stories'][] = $this->story; $this->story = array(); break; case 'TEXTINPUT': $this->item['textinput'] = true; case 'ITEM': $this->structure['items'][] = $this->item; $this->item = array(); break; } } /** * The handler for character data encountered in the XML file. */ function character_data ($parser, $data) { if (preg_match('|\S|', $data)) { switch ($this->parent) { case 'CHANNEL': if (isset($this->channel[$this->child])) $this->channel[$this->child] .= $data; else $this->channel[$this->child] = $data; break; case 'IMAGE': if (isset($this->image[$this->child])) $this->image[$this->child] .= $data; else $this->image[$this->child] = $data; break; case 'STORY': if (isset($this->story[$this->child])) $this->story[$this->child] .= $data; else $this->story[$this->child] = $data; break; default: if (isset($this->item[$this->child])) $this->item[$this->child] .= $data; else $this->item[$this->child] = $data; break; } } } /** * Handles things that we don't recognize. A no-op. */ function default_handler ($parser, $data) { // no-op } } ?>