# The contents of this file are subject to the Mozilla Public
# License Version 1.1 (the "License"); you may not use this file
# except in compliance with the License. You may obtain a copy of
# the License at http://www.mozilla.org/MPL/
#
# Software distributed under the License is distributed on an "AS
# IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
# implied. See the License for the specific language governing
# rights and limitations under the License.
#
# The Original Code is RDFSummary version 1.0.
#
# The Initial Developer of the Original Code is European Environment
# Agency (EEA).  Portions created by CMG are
# Copyright (C) European Environment Agency.  All
# Rights Reserved.
#
# Contributor(s):
# Soren Roug, EEA
# Dominique Dutoit, European Commission
# Tomas Hjelmberg, CMG
# URAGO Keisuke, <bravo@resourcez.org> - for slash code
#
# $Id: RDFSummary.py,v 1.35 2003/08/26 14:22:29 roug Exp $
#
from AccessControl import ClassSecurityInfo
from DateTime import *
import binascii,md5
import operator, string, time
try:
    import urllib2 # Support for authenticated proxy
    ulib2 = 1
except:
    import urllib
    ulib2 = 0
import xmllib
import sys
import Globals
from Globals import Persistent, Acquisition
from webdav.common import rfc1123_date
import AccessControl
import OFS
from Products.ZCatalog.CatalogAwareness import CatalogAware

import pickle, types, os, string
from os.path import join, isfile

ns_separator = " "
RDFNS = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
RSS10NS = "http://purl.org/rss/1.0/"

RDFABOUT_ATTR = RDFNS + ns_separator + "about"
RDFID_ATTR = RDFNS + ns_separator + "ID"
RSSABOUT_ATTR = RSS10NS + ns_separator + "about"
RSSID_ATTR = RSS10NS + ns_separator + "ID"

_repos = join(CLIENT_HOME, 'RDFSummary')

fixslash = string.maketrans('/','-')

known_elements = {
# Old Netscape RSS elements version 0.9
  'http://my.netscape.com/rdf/simple/0.9/ title':'title',
  'http://my.netscape.com/rdf/simple/0.9/ description': 'description',
  'http://my.netscape.com/rdf/simple/0.9/ link': 'link',
  'http://my.netscape.com/rdf/simple/0.9/ name': 'name',
  'http://my.netscape.com/rdf/simple/0.9/ url': 'url',
# Netscape RSS elements version 0.91 (No namespace)
  'title':'title',
  'description': 'description',
  'link': 'link',
  'url': 'url',
  'name': 'name',
  'rating':'rating',
  'language':'language',
  'width': 'width',
  'height': 'height',
  'copyright':'copyright',
  'pubDate':'pubdate',
  'lastBuildDate':'lastbuilddate',
  'docs':'docs',
  'managingEditor':'managingeditor',
  'webMaster':'webmaster',
  'hour':'hour',
  'day':'day',
  'skipDays':'skipdays',
  'skipHours':'skiphours',
# Netscape RSS elements version 0.92 (No namespace)
  'cloud':'cloud',
  'source':'source',
  'enclosure':'enclosure',
  'category':'category',
# Base RSS 1.0 elements
  'http://purl.org/rss/1.0/ title':'title',
  'http://purl.org/rss/1.0/ description': 'description',
  'http://purl.org/rss/1.0/ link': 'link',
  'http://purl.org/rss/1.0/ name': 'name',
  'http://purl.org/rss/1.0/ url': 'url',
# Sourceforge abuse of RSS 1.0
  'http://purl.org/rss/1.0/ lastBuildDate':'lastbuilddate',
# RSS 0.91 module for RSS 1.0
  'http://purl.org/rss/1.0/modules/rss091# language':'language',
  'http://purl.org/rss/1.0/modules/rss091# rating':'rating',
  'http://purl.org/rss/1.0/modules/rss091# managingEditor':'managingeditor',
  'http://purl.org/rss/1.0/modules/rss091# webMaster':'webmaster',
  'http://purl.org/rss/1.0/modules/rss091# webmaster':'webmaster',
  'http://purl.org/rss/1.0/modules/rss091# pubDate':'pubdate',
  'http://purl.org/rss/1.0/modules/rss091# lastBuildDate':'lastbuilddate',
  'http://purl.org/rss/1.0/modules/rss091# copyright':'copyright',
  'http://purl.org/rss/1.0/modules/rss091# skipHours':'skiphours',
  'http://purl.org/rss/1.0/modules/rss091# hour':'hour',
  'http://purl.org/rss/1.0/modules/rss091# skipDays':'skipdays',
  'http://purl.org/rss/1.0/modules/rss091# day':'day',
  'http://purl.org/rss/1.0/modules/rss091# width': 'width',
  'http://purl.org/rss/1.0/modules/rss091# height': 'height',
  'http://purl.org/rss/1.0/modules/rss091# description':'description',
# RSS Syndication
  'http://purl.org/rss/1.0/modules/syndication/ updatePeriod': 'updatePeriod',
  'http://purl.org/rss/1.0/modules/syndication/ updateFrequency': 'updateFrequency',
  'http://purl.org/rss/1.0/modules/syndication/ updateBase': 'updateBase',
# Dublin Core
  'http://purl.org/dc/elements/1.1/ title':'title',
  'http://purl.org/dc/elements/1.1/ creator':'creator',
  'http://purl.org/dc/elements/1.1/ subject':'subject',
  'http://purl.org/dc/elements/1.1/ description':'description',
  'http://purl.org/dc/elements/1.1/ publisher':'publisher',
  'http://purl.org/dc/elements/1.1/ contributor':'contributor',
  'http://purl.org/dc/elements/1.1/ date':'date',
  'http://purl.org/dc/elements/1.1/ type':'type',
  'http://purl.org/dc/elements/1.1/ format':'format',
  'http://purl.org/dc/elements/1.1/ identifier':'identifier',
  'http://purl.org/dc/elements/1.1/ source':'source',
  'http://purl.org/dc/elements/1.1/ language':'language',
  'http://purl.org/dc/elements/1.1/ relation':'relation',
  'http://purl.org/dc/elements/1.1/ coverage':'coverage',
  'http://purl.org/dc/elements/1.1/ rights':'rights',
# Obsolete Dublin Core
  'http://purl.org/metadata/dublin_core# Title':'title',
  'http://purl.org/metadata/dublin_core# Creator':'creator',
  'http://purl.org/metadata/dublin_core# Subject':'subject',
  'http://purl.org/metadata/dublin_core# Description':'description',
  'http://purl.org/metadata/dublin_core# Publisher':'publisher',
  'http://purl.org/metadata/dublin_core# Contributor':'contributor',
  'http://purl.org/metadata/dublin_core# Date':'date',
  'http://purl.org/metadata/dublin_core# Type':'type',
  'http://purl.org/metadata/dublin_core# Format':'format',
  'http://purl.org/metadata/dublin_core# Identifier':'identifier',
  'http://purl.org/metadata/dublin_core# Source':'source',
  'http://purl.org/metadata/dublin_core# Language':'language',
  'http://purl.org/metadata/dublin_core# Relation':'relation',
  'http://purl.org/metadata/dublin_core# Coverage':'coverage',
  'http://purl.org/metadata/dublin_core# Rights':'rights',
# My own Event module
  'http://purl.org/rss/1.0/modules/event/ startdate': 'startdate',
  'http://purl.org/rss/1.0/modules/event/ enddate': 'enddate',
  'http://purl.org/rss/1.0/modules/event/ location': 'location',
  'http://purl.org/rss/1.0/modules/event/ type': 'type',
  'http://purl.org/rss/1.0/modules/event/ organizer': 'organizer',
# Slash module (as abused by Newsforge)
  'http://slashcode.com/rss/1.0/modules/Slash/ slash': 'slash',
  'http://slashcode.com/rss/1.0/modules/Slash/ department': 'department',
  'http://slashcode.com/rss/1.0/modules/Slash/ section': 'section',
  'http://slashcode.com/rss/1.0/modules/Slash/ comments': 'comments',
  'http://slashcode.com/rss/1.0/modules/Slash/ hitparade': 'hitparade',
# Slash module (as defined in standard documentation)
  'http://slashcode.com/rss/1.0/modules/slash/ department': 'department',
  'http://slashcode.com/rss/1.0/modules/slash/ section': 'section',
  'http://slashcode.com/rss/1.0/modules/slash/ comments': 'comments',
  'http://slashcode.com/rss/1.0/modules/slash/ hit_parade': 'hitparade',
  }

class RDFSummary (
    CatalogAware,
    Acquisition.Implicit,
    Persistent,
    AccessControl.Role.RoleManager,
    OFS.SimpleItem.Item):
    "Retrieve RDF Summaries from other websites."

    # Declare the security
    #
    security=ClassSecurityInfo()

#   security.setPermissionDefault('Change RDFSummaries',('Manager',))

    manage_options=(
        {'label':'Properties', 'action':'manage_main'},
        {'label':'View', 'action':''},
        {'label':'Update', 'action':'update'},
        {'label':'Source', 'action':'show_source'},
        ) + OFS.SimpleItem.SimpleItem.manage_options

    meta_type = 'RDF Summary'

    security.declareProtected('View', 'index_html')

    index_html = Globals.DTMLFile("htmlview", globals())

    security.declareProtected('View', 'show_source')

    show_source = Globals.DTMLFile("source", globals())

    security.declareProtected('View management screens', 'manage_main')

    manage_main = Globals.DTMLFile("edit_prop", globals())

    security.declareProtected('View', 'channel')

    def channel(self):
        "Return channel dictionary"
        return self._v_channel

    security.declareProtected('View', 'textinput')

    def textinput(self):
        "Text input dictionary"
        return self._v_textinput

    security.declareProtected('View', 'image')

    def image(self):
        "Return the image"
        return self._v_image

    security.declareProtected('View', 'items')

    def items(self):
        "Return the list of items"
        return self._v_items

    security.declareProtected('View', 'rdfsource')

    def rdfsource(self):
        "Return the RDF source"
        return self._v_rdfsource

    security.declarePublic('filename')

    def filename(self):
        "Return the filename"
        return self._filename

    security.declarePublic('lastupdated')

    def lastupdated(self):
        "Return the date last updated"
        return self._v_updatedate

    def __init__(self, id, title, rssurl, http_proxy, fetchimage):
        self.id = id
        self.title = title
        self.rssurl = rssurl
        self.http_proxy = http_proxy
        self.fetchimage = fetchimage
        self._filename = '' # Signal manage_afterAdd we're new

        self._v_updatedate = None
        self._v_textinput = {}
        self._v_channel = {}
        self._v_image = {}
        self._v_items = []
        self._v_rdfsource = None

    def __setstate__(self,state):
        #restore object
        Persistent.__setstate__(self, state)
        if not hasattr(self, "_filename"): # backwards compatibility
            self._filename = self.id
        if not hasattr(self, "fetchimage"): # backwards compatibility
            self.fetchimage = "yes"
        self._loadpickles()

    def _loadpickles(self):
        try:
            f = open(self.physicalpath(self._filename), 'r')
            self._v_updatedate = pickle.load(f)
            self._v_textinput = pickle.load(f)
            self._v_channel = pickle.load(f)
            self._v_image = pickle.load(f)
            self._v_items = pickle.load(f)
            self._v_rdfsource = pickle.load(f)
            f.close()
        except (EOFError,SystemError,IOError):
            self._v_updatedate = None
            self._v_textinput = {}
            self._v_channel = {}
            self._v_image = {}
            self._v_items = []
            self._v_rdfsource = None

    security.declareProtected('Change RDFSummaries', 'manage_edit')

    def manage_edit(self, title, rssurl, http_proxy, updateonchg='off',
                    fetchimage='no', REQUEST=None):
        "Edits the summary's characteristics"
        self.title = title
        self.rssurl = rssurl
        self.http_proxy = http_proxy
        self.fetchimage = fetchimage

        if updateonchg == "on":
            self.reindex_object()
            return self.update(REQUEST)
        else:
            if REQUEST is not None:
                return Globals.MessageDialog(
                title='Edited',
                message='Content of <strong>%s</strong> has been edited.' %
                (self.id), action ='manage_main',
                )

    ################################
    ##        Public methods       #
    ################################

    security.declareProtected('View', 'update')

    def update(self,REQUEST=None):
        "Call this function to get it to update its content"
        # make the directories
        if not os.path.isdir(_repos):
            try:
                os.makedirs(_repos)
            except:
                raise OSError, 'Can\'t create directory %s' %_repos
        self._v_updatedate = DateTime()
        p=RDFParser(self.http_proxy,self.fetchimage)
        p.parse_url(self.rssurl, REQUEST)

        # channel and at least one item is required in all supported RSS-versions
        if p.channel != {}:
            self._v_textinput = p.textinput
            self._v_channel = p.channel
            self._v_image = p.image
            self._v_items = p.items
            self._v_rdfsource = p.rdfsource
        else:
            return Globals.MessageDialog(
            title='Error',
            message='Error updating <strong>%s</strong>.<br> channel element is required.' % self.id,
            action ='manage_main',
            )

        fn = self.physicalpath(self._filename)
        try:
            os.rename(fn, fn+'.undo')
        except OSError:
            pass

        #write object
        f = open(fn, 'w')
        pickle.dump(self._v_updatedate, f)
        pickle.dump(self._v_textinput, f)
        pickle.dump(self._v_channel, f)
        pickle.dump(self._v_image, f)
        pickle.dump(self._v_items, f)
        pickle.dump(self._v_rdfsource, f)
        f.close()

        if REQUEST is not None:
            return Globals.MessageDialog(
            title='Updated',
            message='Content of <strong>%s</strong> has been updated.' %
            (self.id), action ='manage_main',
            )

    security.declareProtected('View', 'picture')

    def picture(self):
        " Make a img element that displays the picture"
        if self._v_image.has_key('data'):
            lstart=''
            lend=''
            if self._v_image.has_key('link'):
                lstart='<a href="%s">' % self._v_image['link']
                lend='</a>'
            alt=''
            if self._v_image.has_key('title'):
                alt='alt="%s" ' % self._v_image['title']
            return ('%s<img src="%s/view_image" border="0" %s/>%s' %
                    ( lstart, self.absolute_url(), alt, lend ))

    security.declareProtected('View', 'view_image')

    def view_image(self, REQUEST, RESPONSE):
        """
        The default view of the contents of an Image.

        Returns the contents of the file or image.  Also, sets the
        Content-Type HTTP header to the objects content type.
        """
        # HTTP If-Modified-Since header handling.
        header=REQUEST.get_header('If-Modified-Since', None)
        if header is not None:
            header=string.split(header, ';')[0]
            # Some proxies seem to send invalid date strings for this
            # header. If the date string is not valid, we ignore it
            # rather than raise an error to be generally consistent
            # with common servers such as Apache (which can usually
            # understand the screwy date string as a lucky side effect
            # of the way they parse it).
            try:    mod_since=long(DateTime(header).timeTime())
            except: mod_since=None
            last_mod = long(0)
            if mod_since is not None:
                if self._p_mtime:
                    last_mod = long(self._p_mtime)
                else:
                    last_mod = long(0)
            if last_mod > 0 and last_mod <= mod_since:
                # Set header values since apache caching will return Content-Length
                # of 0 in response if size is not set here
                RESPONSE.setHeader('Last-Modified', rfc1123_date(self._p_mtime))
                RESPONSE.setHeader('Content-Type', self._v_image['content_type'])
                RESPONSE.setHeader('Content-Length', self._v_image['size'])
                RESPONSE.setStatus(304)
                return ''

        RESPONSE.setHeader('Last-Modified', rfc1123_date(self._p_mtime))
        RESPONSE.setHeader('Content-Type', self._v_image['content_type'])
        RESPONSE.setHeader('Content-Length', self._v_image['size'])

        data=self._v_image['data']
        if type(data) is type(''): return data

        while data is not None:
            RESPONSE.write(data.data)
            data=data.next

        return ''

    ################################
    ##       Private methods       #
    ################################

    def _copy(self, infile, outfile):
        """ read binary data from infile and write it to outfile
            infile and outfile my be strings, in which case a file with that
            name is opened, or filehandles, in which case they are accessed
            directly.
        """
        if type(infile) is types.StringType:
                try:
                        instream = open(infile, 'rb')
                except IOError:
                        self._undo()
                        try:
                                instream = open(infile, 'rb')
                        except IOError:
                                raise IOError, ("%s (%s)" %(self.id, infile))
                close_in = 1
        else:
                instream = infile
                close_in = 0
        if type(outfile) is types.StringType:
                try:
                        outstream = open(outfile, 'wb')
                except IOError:
                        raise IOError, ("%s (%s)" %(self.id, outfile))
                close_out = 1
        else:
                outstream = outfile
                close_out = 0
        try:
                blocksize = 2<<16
                block = instream.read(blocksize)
                outstream.write(block)
                while len(block)==blocksize:
                        block = instream.read(blocksize)
                        outstream.write(block)
        except IOError:
                raise IOError, ("%s (%s)" %(self.id, filename))
        try: instream.seek(0)
        except: pass
        if close_in: instream.close()
        if close_out: outstream.close()


    def _undo(self):
        """ Restore filename after undo or copy-paste """
        if self._filename == '':
            return
        fn = self.physicalpath(self._filename)
        if not isfile(fn) and isfile(fn+'.undo'):
                os.rename(fn+'.undo', fn)
        self._loadpickles()

    def _get_new_ufn(self):
        """ Create a new unique filename, drop the last newline
            The base64 set of characters are listed in rfc1341. Unfortunately
            it includes the / character, and I must deal with that in UNIX systems.
        """
        return string.translate(binascii.b2a_base64(md5.new(self.absolute_url(1)).digest()),
          fixslash,'\r\n')

    def physicalpath(self, filename=''):
        """ Generate the full filename, including directories from
            _repos and self._filename
        """
        path = _repos
        if type(filename)==types.ListType:
            for item in filename:
                path = join(path,item)
        elif filename != '':
            path = join(path,filename)
        return path

    ################################
    ## Special management methods  #
    ################################

    def manage_afterAdd(self, item, container, new_fn=None):
        """ This method is called, whenever _setObject in ObjectManager gets
        called. This is the case after a normal add and if the object is a
        result of cut-paste- or rename-operation.

        If it is a fresh add, then we don't want to load obsolete pickles from
        an old object with the same name, but if it is a cut-n-paste job, then
        the new object should load the pickles.
        """
        new_fn = new_fn or self._get_new_ufn()
        if self._filename != '':
            old_fn = self.physicalpath(self._filename)
            if isfile(old_fn):
                self._copy(old_fn, self.physicalpath(new_fn))
            else:
                if isfile(old_fn+'.undo'):
                    self._copy(old_fn+'.undo', self.physicalpath(new_fn))
            self._loadpickles()
        else:
            try:
                os.unlink(new_fn)
            except OSError:
                pass
        self._filename = new_fn
        return RDFSummary.inheritedAttribute ("manage_afterAdd") \
                (self, item, container)

    def manage_beforeDelete(self, item, container):
        """ This method is called, when the object is deleted. To support
        undo-functionality and because this happens too, when the object
        is moved (cut-paste) or renamed, the external file is not deleted.
        It is just renamed to filename.undo and remains in the
        repository, until it is deleted manually.
        """
        fn = self.physicalpath(self._filename)
        try:
            os.unlink(fn+'.undo')
        except OSError:
            pass
        try:
            os.rename(fn, fn+'.undo')
        except OSError:
            pass
        return RDFSummary.inheritedAttribute ("manage_beforeDelete") \
               (self, item, container)

    def manage_undo_transactions(self, transaction_info, REQUEST=None):
        """ This method is called, when the user has chosen an Undo-action.
        To support undo-functionality the external file is just renamed back from
        filename.undo to filename.
        """
        fn = self.physicalpath(self._filename)
        try:
            os.rename(fn+'.undo', fn)
            self._loadpickles()
        except OSError:
            pass
        return RDFSummary.inheritedAttribute ("manage_undo_transactions") \
               (self, transaction_info, REQUEST)

# Initialize the class in order the security assertions be taken into account
#
Globals.InitializeClass(RDFSummary)

def manage_addRDFSummary(self, id, title, rssurl, http_proxy, fetchimage, REQUEST=None):
    """Create a summary and install it in its parent Folder.
    The argument 'self' will be bound to the parent Folder.
    """
    summary = RDFSummary(id, title, rssurl, http_proxy,fetchimage )
    self._setObject(id, summary)
    if REQUEST is not None:
        return self.manage_main(self, REQUEST)

manage_addRDFSummaryForm = Globals.DTMLFile('add_summary', globals())

# I could not figure out how to inherit from xmllib in RDFSummary without
# getting a TypeError. Since it is not possible to pickle the content of
# elements (references to methods) either I decided to isolate xml in its
# own class

class RDFParser(xmllib.XMLParser):
    """Parse an RSS file"""

    def __init__(self, proxy, fetchimage):
        self.inside_channel = 0
        self.channel = {}
        self.textinput = {}
        self.items = []
        self.image = {}
        self.encoding = 'UTF-8'
        if proxy:
            self._proxies = { 'http': proxy }
        else:
            self._proxies = {}
        self.fetchimage = fetchimage
        self._rssver = 100
        self.__item={}
        self.__data=[]
        self.__count=0
    #   self.entitydefs = known_entities
        self.elements = {
        # RSS 1.0
          'http://purl.org/rss/1.0/ item':
              (self.start_item,self.end_item),
          'http://purl.org/rss/1.0/ items':
              (self.ignore_tag,self.ignore_tag),
          'http://purl.org/rss/1.0/ image':
              (self.start_image,self.end_image),
          'http://purl.org/rss/1.0/ channel':
              (self.start_channel,self.end_channel),
          'http://purl.org/rss/1.0/ textinput':
              (self.start_textinput,self.end_textinput),
        # Netscape RSS 0.9
          'http://my.netscape.com/rdf/simple/0.9/ item':
              (self.start_item,self.end_item),
          'http://my.netscape.com/rdf/simple/0.9/ image':
              (self.start_image,self.end_image),
          'http://my.netscape.com/rdf/simple/0.9/ channel':
              (self.start_channel,self.end_channel),
          'http://my.netscape.com/rdf/simple/0.9/ textinput':
              (self.start_textinput,self.end_textinput),
        # RDF
          'http://www.w3.org/1999/02/22-rdf-syntax-ns# RDF':
              (self.ignore_tag,self.ignore_tag),
          'http://www.w3.org/1999/02/22-rdf-syntax-ns# Seq':
              (self.ignore_tag,self.ignore_tag),
          'http://www.w3.org/1999/02/22-rdf-syntax-ns# Bag':
              (self.ignore_tag,self.ignore_tag),
          'http://www.w3.org/1999/02/22-rdf-syntax-ns# Alt':
              (self.ignore_tag,self.ignore_tag),
          'http://www.w3.org/1999/02/22-rdf-syntax-ns# li':
              (self.ignore_tag,self.ignore_tag),
        # RSS 0.91
          'rss': (self.start_rss91,self.ignore_tag),
          'channel': (self.start_channel,self.end_channel),
          'item': (self.start_item,self.end_item),
          'image': (self.start_image,self.end_image),
          'textinput': (self.start_textinput,self.end_textinput),
          # For the benefit of my.userland.com
          'textInput': (self.start_textinput,self.end_textinput),
          }
        xmllib.XMLParser.__init__(self)

# Most tags don't have special handlers. What we do is to recognize
# <item>, <channel> and <image> and <textinput>
# tags inside those gets converted to properties of the handler tag.

    def handle_xml(self,encoding,standalone):
        if(encoding):
            self.encoding = encoding

    def unknown_starttag(self, tag, attrs):
        if known_elements.has_key(tag):
            self.__data = []
            self.__tag=known_elements[tag]

    def unknown_endtag(self, tag):
        if not known_elements.has_key(tag):
            raise RuntimeError, "Unsupported tag: " + tag
        self.__item[self.__tag]=string.join(self.__data,"")

    def ignore_tag(self,attrs=None):
        pass

    def handle_data(self, text):
        self.__data.append(text)

    def handle_cdata(self, text):
        self.__data.append(text)

    def handle_charref(self,ref):
        self.handle_data('&#' + ref + ';')

    def unknown_entityref(self,ref):
        self.handle_data('&' + ref + ';')

    def syntax_error(self,message):
        pass

    def start_channel(self, attrs):
        "Start of channel info"
        self.inside_channel = 1
        self.channel = {}
        self.items = []
        self.image = {}
        self.__item= {}

    def end_channel(self):
        self.channel=self.__item
        self.channel['encoding'] = self.encoding
        self.inside_channel = 0

    def start_textinput(self, attrs):
        if self._rssver == 91:
            self.__itemsave = self.__item
        self.__item={}

    def end_textinput(self):
        self.textinput=self.__item
        if self._rssver == 91:
            self.__item = self.__itemsave

    def about_id_attr(self,attrs):
        about= None
        if attrs.has_key(RDFABOUT_ATTR):
            about = attrs[RDFABOUT_ATTR]
        elif attrs.has_key(RSSABOUT_ATTR):
            about = attrs[RSSABOUT_ATTR]
        if about is not None:
            if about == "":
                self.__item["rdfsubject"] = self.baseurl
            elif about[0] == "#":
                self.__item["rdfsubject"] = self.baseurl + about
            else:
                self.__item["rdfsubject"] = about
        elif attrs.has_key(RDFID_ATTR):
            self.__item["rdfsubject"] = self.baseurl + "#" + attrs[RDFID_ATTR]
        elif attrs.has_key(RSSID_ATTR):
            self.__item["rdfsubject"] = self.baseurl + "#" + attrs[RSSID_ATTR]

    def start_item(self, attrs):
        if self._rssver == 91:
            self.__itemsave = self.__item
        self.__item={}
        self.about_id_attr(attrs)

    def end_item(self):
        self.items.append(self.__item)
        if self._rssver == 91:
            self.__item = self.__itemsave

    def start_image(self, attrs):
        if self._rssver != 91 and self.inside_channel:
            return
        if self._rssver == 91:
            self.__itemsave = self.__item
        self.__item={}

    def end_image(self):
        """
        this basically involves fetching the image file into a data string.
        """
        if self._rssver != 91 and self.inside_channel:
            return

        if self.fetchimage != "yes":
            return

        if ulib2 == 1:
            proxy_support = urllib2.ProxyHandler(self._proxies)
            opener = urllib2.build_opener(proxy_support, urllib2.HTTPHandler)

            urllib2.install_opener(opener)
            f = urllib2.urlopen(self.__item['url'])
        else:
            u = urllib.URLopener(proxies=self._proxies)
            if not u:
                raise IOError, "Unsupported protocol"
            u.addheader("User-agent", "RDFSummary (helpdesk@eionet.eu.int)")
            f = u.open(self.__item['url'])

        if not f:
            raise IOError, "Failure in open"
        data = f.read()
        headers = f.info()
        f.close()
        if headers.has_key('content-type'):
            ctype=headers['content-type']
        else:
            ctype='image/gif'

        # now to import the image data
        self.image=self.__item
        self.image['data'] = data
        self.image['content_type'] = ctype
        self.image['size'] = len(data)
        # restore the itemstore to how it was before <image> if RSS 0.91
        if self._rssver == 91:
            self.__item = self.__itemsave

    def parse_url(self, url, REQUEST):
        """
        Grab the file from the webserver and feed it to the parser
        """
        self.baseurl = url
        if ulib2 == 1:
            proxy_support = urllib2.ProxyHandler(self._proxies)
            opener = urllib2.build_opener(proxy_support, urllib2.HTTPHandler)

            urllib2.install_opener(opener)
            f = urllib2.urlopen(url)
        else:
            u = urllib.URLopener(proxies=self._proxies)
            if not u:
                raise IOError, "Unsupported protocol"
            u.addheader("User-agent", "RDFSummary (helpdesk@eionet.eu.int)")
            f = u.open(url)

        if not f:
            raise IOError, "Failure in open"
        self.rdfsource = f.read()

        if self.rdfsource:
            self.feed(self.rdfsource)
        else:
            raise RuntimeError, "Unable to GET content"

# RSS 0.91 support
    def start_rss91(self, attrs):
        self._rssver = 91
