2 # -*- coding: UTF-8 -*-
5 Between runs of Planet we need somewhere to store the feed information
6 we parsed, this is so we don't lose information when a particular feed
7 goes away or is too short to hold enough items.
9 This module provides the code to handle this cache transparently enough
10 that the rest of the code can take the persistance for granted.
17 # Regular expressions to sanitise cache filenames
18 re_url_scheme = re.compile(r'^[^:]*://')
19 re_slash = re.compile(r'[?/]+')
20 re_initial_cruft = re.compile(r'^[,.]*')
21 re_final_cruft = re.compile(r'[,.]*$')
25 """Cached information.
27 This class is designed to hold information that is stored in a cache
28 between instances. It can act both as a dictionary (c['foo']) and
29 as an object (c.foo) to get and set values and supports both string
32 If you wish to support special fields you can derive a class off this
33 and implement get_FIELD and set_FIELD functions which will be
40 def __init__(self, cache, id_, root=0):
46 self._id = id_.replace(" ", "%20")
49 def cache_key(self, key):
50 """Return the cache key name for the given key."""
51 key = key.replace(" ", "_")
55 return self._id + " " + key
58 """Read information from the cache."""
64 if self._cache.has_key(keys_key):
65 keys = self._cache[keys_key].split(" ")
70 cache_key = self.cache_key(key)
71 if not self._cached.has_key(key) or self._cached[key]:
72 # Key either hasn't been loaded, or is one for the cache
73 self._value[key] = self._cache[cache_key]
74 self._type[key] = self._cache[cache_key + " type"]
77 def cache_write(self, sync=1):
78 """Write information to the cache."""
79 self.cache_clear(sync=0)
82 for key in self.keys():
83 cache_key = self.cache_key(key)
84 if not self._cached[key]:
85 if self._cache.has_key(cache_key):
86 # Non-cached keys need to be cleared
87 del(self._cache[cache_key])
88 del(self._cache[cache_key + " type"])
92 self._cache[cache_key] = self._value[key]
93 self._cache[cache_key + " type"] = self._type[key]
100 self._cache[keys_key] = " ".join(keys)
104 def cache_clear(self, sync=1):
105 """Remove information from the cache."""
111 if self._cache.has_key(keys_key):
112 keys = self._cache[keys_key].split(" ")
113 del(self._cache[keys_key])
118 cache_key = self.cache_key(key)
119 del(self._cache[cache_key])
120 del(self._cache[cache_key + " type"])
125 def has_key(self, key):
126 """Check whether the key exists."""
127 key = key.replace(" ", "_")
128 return self._value.has_key(key)
130 def key_type(self, key):
131 """Return the key type."""
132 key = key.replace(" ", "_")
133 return self._type[key]
135 def set(self, key, value, cached=1):
136 """Set the value of the given key.
138 If a set_KEY function exists that is called otherwise the
139 string function is called and the date function if that fails
140 (it nearly always will).
142 key = key.replace(" ", "_")
145 func = getattr(self, "set_" + key)
146 except AttributeError:
149 return func(key, value)
152 return self.set_as_null(key, value)
155 return self.set_as_string(key, value)
157 return self.set_as_date(key, value)
160 """Return the value of the given key.
162 If a get_KEY function exists that is called otherwise the
163 correctly typed function is called if that exists.
165 key = key.replace(" ", "_")
168 func = getattr(self, "get_" + key)
169 except AttributeError:
175 func = getattr(self, "get_as_" + self._type[key])
176 except AttributeError:
181 return self._value[key]
183 def set_as_string(self, key, value, cached=1):
184 """Set the key to the string value.
186 The value is converted to UTF-8 if it is a Unicode string, otherwise
187 it's assumed to have failed decoding (feedparser tries pretty hard)
188 so has all non-ASCII characters stripped.
192 key = key.replace(" ", "_")
193 self._value[key] = value
194 self._type[key] = self.STRING
195 self._cached[key] = cached
197 def get_as_string(self, key):
198 """Return the key as a string value."""
199 key = key.replace(" ", "_")
200 if not self.has_key(key):
203 return self._value[key]
205 def set_as_date(self, key, value, cached=1):
206 """Set the key to the date value.
208 The date should be a 9-item tuple as returned by time.gmtime().
210 value = " ".join([ str(s) for s in value ])
212 key = key.replace(" ", "_")
213 self._value[key] = value
214 self._type[key] = self.DATE
215 self._cached[key] = cached
217 def get_as_date(self, key):
218 """Return the key as a date value."""
219 key = key.replace(" ", "_")
220 if not self.has_key(key):
223 value = self._value[key]
224 return tuple([ int(i) for i in value.split(" ") ])
226 def set_as_null(self, key, value, cached=1):
227 """Set the key to the null value.
229 This only exists to make things less magic.
231 key = key.replace(" ", "_")
232 self._value[key] = ""
233 self._type[key] = self.NULL
234 self._cached[key] = cached
236 def get_as_null(self, key):
237 """Return the key as the null value."""
238 key = key.replace(" ", "_")
239 if not self.has_key(key):
244 def del_key(self, key):
245 """Delete the given key."""
246 key = key.replace(" ", "_")
247 if not self.has_key(key):
250 del(self._value[key])
252 del(self._cached[key])
255 """Return the list of cached keys."""
256 return self._value.keys()
259 """Iterate the cached keys."""
260 return iter(self._value.keys())
263 __contains__ = has_key
264 __setitem__ = set_as_string
266 __delitem__ = del_key
267 __delattr__ = del_key
269 def __setattr__(self, key, value):
270 if key.startswith("_"):
271 self.__dict__[key] = value
275 def __getattr__(self, key):
276 if self.has_key(key):
279 raise AttributeError, key
282 def filename(directory, filename):
283 """Return a filename suitable for the cache.
285 Strips dangerous and common characters to create a filename we
286 can use to store the cache in.
288 filename = re_url_scheme.sub("", filename)
289 filename = re_slash.sub(",", filename)
290 filename = re_initial_cruft.sub("", filename)
291 filename = re_final_cruft.sub("", filename)
293 return os.path.join(directory, filename)
296 """Return the value as a UTF-8 string."""
297 if type(value) == type(u''):
298 return value.encode("utf-8")
301 return unicode(value, "utf-8").encode("utf-8")
304 return unicode(value, "iso-8859-1").encode("utf-8")
306 return unicode(value, "ascii", "replace").encode("utf-8")