2010-10-13 Sergio Villar Senin <svillar@igalia.com>
[WebKit-https.git] / WebCore / platform / network / soup / cache / webkit / soup-cache.c
1 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
2 /*
3  * soup-cache.c
4  *
5  * Copyright (C) 2009, 2010 Igalia S.L.
6  *
7  * This library is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Library General Public
9  * License as published by the Free Software Foundation; either
10  * version 2 of the License, or (at your option) any later version.
11  *
12  * This library is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Library General Public License for more details.
16  *
17  * You should have received a copy of the GNU Library General Public License
18  * along with this library; see the file COPYING.LIB.  If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20  * Boston, MA 02110-1301, USA.
21  */
22
23 /* TODO:
24  * - Need to hook the feature in the sync SoupSession.
25  * - Need more tests.
26  */
27
28 #ifdef HAVE_CONFIG_H
29 #include <config.h>
30 #endif
31
32 #include "soup-cache.h"
33 #include "soup-cache-private.h"
34 #include <libsoup/soup.h>
35 #include <gio/gio.h>
36 #include <stdlib.h>
37
38 static SoupSessionFeatureInterface *webkit_soup_cache_default_feature_interface;
39 static void webkit_soup_cache_session_feature_init (SoupSessionFeatureInterface *feature_interface, gpointer interface_data);
40
41 #define DEFAULT_MAX_SIZE 50 * 1024 * 1024
42 #define MAX_ENTRY_DATA_PERCENTAGE 10 /* Percentage of the total size
43                                         of the cache that can be
44                                         filled by a single entry */
45
46 typedef struct _WebKitSoupCacheEntry {
47         char *key;
48         char *filename;
49         guint freshness_lifetime;
50         gboolean must_revalidate;
51         GString *data;
52         gsize pos;
53         gsize length;
54         time_t corrected_initial_age;
55         time_t response_time;
56         gboolean writing;
57         gboolean dirty;
58         gboolean got_body;
59         gboolean being_validated;
60         SoupMessageHeaders *headers;
61         GOutputStream *stream;
62         GError *error;
63         guint hits;
64         GCancellable *cancellable;
65 } WebKitSoupCacheEntry;
66
67 struct _WebKitSoupCachePrivate {
68         char *cache_dir;
69         GHashTable *cache;
70         guint n_pending;
71         SoupSession *session;
72         WebKitSoupCacheType cache_type;
73         guint size;
74         guint max_size;
75         guint max_entry_data_size; /* Computed value. Here for performance reasons */
76         GList *lru_start;
77 };
78
79 typedef struct {
80         WebKitSoupCache *cache;
81         WebKitSoupCacheEntry *entry;
82         SoupMessage *msg;
83         gulong got_chunk_handler;
84         gulong got_body_handler;
85         gulong restarted_handler;
86 } WebKitSoupCacheWritingFixture;
87
88 enum {
89         PROP_0,
90         PROP_CACHE_DIR,
91         PROP_CACHE_TYPE
92 };
93
94 #define WEBKIT_SOUP_CACHE_GET_PRIVATE(o) (G_TYPE_INSTANCE_GET_PRIVATE ((o), WEBKIT_TYPE_SOUP_CACHE, WebKitSoupCachePrivate))
95
96 G_DEFINE_TYPE_WITH_CODE (WebKitSoupCache, webkit_soup_cache, G_TYPE_OBJECT,
97                          G_IMPLEMENT_INTERFACE (SOUP_TYPE_SESSION_FEATURE,
98                                                 webkit_soup_cache_session_feature_init))
99
100 static gboolean webkit_soup_cache_entry_remove (WebKitSoupCache *cache, WebKitSoupCacheEntry *entry);
101 static void make_room_for_new_entry (WebKitSoupCache *cache, guint length_to_add);
102 static gboolean cache_accepts_entries_of_size (WebKitSoupCache *cache, guint length_to_add);
103
104 static WebKitSoupCacheability
105 get_cacheability (WebKitSoupCache *cache, SoupMessage *msg)
106 {
107         WebKitSoupCacheability cacheability;
108         const char *cache_control;
109
110         /* 1. The request method must be cacheable */
111         if (msg->method == SOUP_METHOD_GET)
112                 cacheability = WEBKIT_SOUP_CACHE_CACHEABLE;
113         else if (msg->method == SOUP_METHOD_HEAD ||
114                  msg->method == SOUP_METHOD_TRACE ||
115                  msg->method == SOUP_METHOD_CONNECT)
116                 return WEBKIT_SOUP_CACHE_UNCACHEABLE;
117         else
118                 return (WEBKIT_SOUP_CACHE_UNCACHEABLE | WEBKIT_SOUP_CACHE_INVALIDATES);
119
120         cache_control = soup_message_headers_get (msg->response_headers, "Cache-Control");
121         if (cache_control) {
122                 GHashTable *hash;
123                 WebKitSoupCachePrivate *priv = WEBKIT_SOUP_CACHE_GET_PRIVATE (cache);
124
125                 hash = soup_header_parse_param_list (cache_control);
126
127                 /* Shared caches MUST NOT store private resources */
128                 if (priv->cache_type == WEBKIT_SOUP_CACHE_SHARED) {
129                         if (g_hash_table_lookup_extended (hash, "private", NULL, NULL)) {
130                                 soup_header_free_param_list (hash);
131                                 return WEBKIT_SOUP_CACHE_UNCACHEABLE;
132                         }
133                 }
134
135                 /* 2. The 'no-store' cache directive does not appear in the
136                  * headers
137                  */
138                 if (g_hash_table_lookup_extended (hash, "no-store", NULL, NULL)) {
139                         soup_header_free_param_list (hash);
140                         return WEBKIT_SOUP_CACHE_UNCACHEABLE;
141                 }
142
143                 /* This does not appear in section 2.1, but I think it makes
144                  * sense to check it too?
145                  */
146                 if (g_hash_table_lookup_extended (hash, "no-cache", NULL, NULL)) {
147                         soup_header_free_param_list (hash);
148                         return WEBKIT_SOUP_CACHE_UNCACHEABLE;
149                 }
150         }
151
152         switch (msg->status_code) {
153         case SOUP_STATUS_PARTIAL_CONTENT:
154                 /* We don't cache partial responses, but they only
155                  * invalidate cached full responses if the headers
156                  * don't match.
157                  */
158                 cacheability = WEBKIT_SOUP_CACHE_UNCACHEABLE;
159                 break;
160
161         case SOUP_STATUS_NOT_MODIFIED:
162                 /* A 304 response validates an existing cache entry */
163                 cacheability = WEBKIT_SOUP_CACHE_VALIDATES;
164                 break;
165
166         case SOUP_STATUS_MULTIPLE_CHOICES:
167         case SOUP_STATUS_MOVED_PERMANENTLY:
168         case SOUP_STATUS_GONE:
169                 /* FIXME: cacheable unless indicated otherwise */
170                 cacheability = WEBKIT_SOUP_CACHE_UNCACHEABLE;
171                 break;
172
173         case SOUP_STATUS_FOUND:
174         case SOUP_STATUS_TEMPORARY_REDIRECT:
175                 /* FIXME: cacheable if explicitly indicated */
176                 cacheability = WEBKIT_SOUP_CACHE_UNCACHEABLE;
177                 break;
178
179         case SOUP_STATUS_SEE_OTHER:
180         case SOUP_STATUS_FORBIDDEN:
181         case SOUP_STATUS_NOT_FOUND:
182         case SOUP_STATUS_METHOD_NOT_ALLOWED:
183                 return (WEBKIT_SOUP_CACHE_UNCACHEABLE | WEBKIT_SOUP_CACHE_INVALIDATES);
184
185         default:
186                 /* Any 5xx status or any 4xx status not handled above
187                  * is uncacheable but doesn't break the cache.
188                  */
189                 if ((msg->status_code >= SOUP_STATUS_BAD_REQUEST &&
190                      msg->status_code <= SOUP_STATUS_FAILED_DEPENDENCY) ||
191                     msg->status_code >= SOUP_STATUS_INTERNAL_SERVER_ERROR)
192                         return WEBKIT_SOUP_CACHE_UNCACHEABLE;
193
194                 /* An unrecognized 2xx, 3xx, or 4xx response breaks
195                  * the cache.
196                  */
197                 if ((msg->status_code > SOUP_STATUS_PARTIAL_CONTENT &&
198                      msg->status_code < SOUP_STATUS_MULTIPLE_CHOICES) ||
199                     (msg->status_code > SOUP_STATUS_TEMPORARY_REDIRECT &&
200                      msg->status_code < SOUP_STATUS_INTERNAL_SERVER_ERROR))
201                         return (WEBKIT_SOUP_CACHE_UNCACHEABLE | WEBKIT_SOUP_CACHE_INVALIDATES);
202                 break;
203         }
204
205         return cacheability;
206 }
207
208 static void
209 webkit_soup_cache_entry_free (WebKitSoupCacheEntry *entry, gboolean purge)
210 {
211         if (purge) {
212                 GFile *file = g_file_new_for_path (entry->filename);
213                 g_file_delete (file, NULL, NULL);
214                 g_object_unref (file);
215         }
216
217         g_free (entry->filename);
218         entry->filename = NULL;
219         g_free (entry->key);
220         entry->key = NULL;
221
222         if (entry->headers) {
223                 soup_message_headers_free (entry->headers);
224                 entry->headers = NULL;
225         }
226
227         if (entry->data) {
228                 g_string_free (entry->data, TRUE);
229                 entry->data = NULL;
230         }
231         if (entry->error) {
232                 g_error_free (entry->error);
233                 entry->error = NULL;
234         }
235         if (entry->cancellable) {
236                 g_object_unref (entry->cancellable);
237                 entry->cancellable = NULL;
238         }
239
240         g_slice_free (WebKitSoupCacheEntry, entry);
241 }
242
243 static void
244 copy_headers (const char *name, const char *value, SoupMessageHeaders *headers)
245 {
246         soup_message_headers_append (headers, name, value);
247 }
248
249 static void
250 update_headers (const char *name, const char *value, SoupMessageHeaders *headers)
251 {
252         if (soup_message_headers_get (headers, name))
253                 soup_message_headers_replace (headers, name, value);
254         else
255                 soup_message_headers_append (headers, name, value);
256 }
257
258 static guint
259 webkit_soup_cache_entry_get_current_age (WebKitSoupCacheEntry *entry)
260 {
261         time_t now = time (NULL);
262         time_t resident_time;
263
264         resident_time = now - entry->response_time;
265         return entry->corrected_initial_age + resident_time;
266 }
267
268 static gboolean
269 webkit_soup_cache_entry_is_fresh_enough (WebKitSoupCacheEntry *entry, int min_fresh)
270 {
271         unsigned limit = (min_fresh == -1) ? webkit_soup_cache_entry_get_current_age (entry) : min_fresh;
272         return entry->freshness_lifetime > limit;
273 }
274
275 static char *
276 soup_message_get_cache_key (SoupMessage *msg)
277 {
278         SoupURI *uri = soup_message_get_uri (msg);
279         return soup_uri_to_string (uri, FALSE);
280 }
281
282 static void
283 webkit_soup_cache_entry_set_freshness (WebKitSoupCacheEntry *entry, SoupMessage *msg, WebKitSoupCache *cache)
284 {
285         const char *cache_control;
286         const char *expires, *date, *last_modified;
287         GHashTable *hash;
288
289         hash = NULL;
290
291         cache_control = soup_message_headers_get (entry->headers, "Cache-Control");
292         if (cache_control) {
293                 const char *max_age, *s_maxage;
294                 gint64 freshness_lifetime = 0;
295                 WebKitSoupCachePrivate *priv = WEBKIT_SOUP_CACHE_GET_PRIVATE (cache);
296
297                 hash = soup_header_parse_param_list (cache_control);
298
299                 /* Should we re-validate the entry when it goes stale */
300                 entry->must_revalidate = (gboolean)g_hash_table_lookup (hash, "must-revalidate");
301
302                 /* Section 2.3.1 */
303                 if (priv->cache_type == WEBKIT_SOUP_CACHE_SHARED) {
304                         s_maxage = g_hash_table_lookup (hash, "s-maxage");
305                         if (s_maxage) {
306                                 freshness_lifetime = g_ascii_strtoll (s_maxage, NULL, 10);
307                                 if (freshness_lifetime) {
308                                         /* Implies proxy-revalidate. TODO: is it true? */
309                                         entry->must_revalidate = TRUE;
310                                         soup_header_free_param_list (hash);
311                                         return;
312                                 }
313                         }
314                 }
315
316                 /* If 'max-age' cache directive is present, use that */
317                 max_age = g_hash_table_lookup (hash, "max-age");
318                 if (max_age)
319                         freshness_lifetime = g_ascii_strtoll (max_age, NULL, 10);
320
321                 if (freshness_lifetime) {
322                         entry->freshness_lifetime = (guint)MIN (freshness_lifetime, G_MAXUINT32);
323                         soup_header_free_param_list (hash);
324                         return;
325                 }
326         }
327
328         if (hash != NULL)
329                 soup_header_free_param_list (hash);
330
331         /* If the 'Expires' response header is present, use its value
332          * minus the value of the 'Date' response header
333          */
334         expires = soup_message_headers_get (entry->headers, "Expires");
335         date = soup_message_headers_get (entry->headers, "Date");
336         if (expires && date) {
337                 SoupDate *expires_d, *date_d;
338                 time_t expires_t, date_t;
339
340                 expires_d = soup_date_new_from_string (expires);
341                 if (expires_d) {
342                         date_d = soup_date_new_from_string (date);
343
344                         expires_t = soup_date_to_time_t (expires_d);
345                         date_t = soup_date_to_time_t (date_d);
346
347                         soup_date_free (expires_d);
348                         soup_date_free (date_d);
349
350                         if (expires_t && date_t) {
351                                 entry->freshness_lifetime = (guint)MAX (expires_t - date_t, 0);
352                                 return;
353                         }
354                 } else {
355                         /* If Expires is not a valid date we should
356                            treat it as already expired, see section
357                            3.3 */
358                         entry->freshness_lifetime = 0;
359                         return;
360                 }
361         }
362
363         /* Otherwise an heuristic may be used */
364
365         /* Heuristics MUST NOT be used with these status codes
366            (section 2.3.1.1) */
367         if (msg->status_code != SOUP_STATUS_OK &&
368             msg->status_code != SOUP_STATUS_NON_AUTHORITATIVE &&
369             msg->status_code != SOUP_STATUS_PARTIAL_CONTENT &&
370             msg->status_code != SOUP_STATUS_MULTIPLE_CHOICES &&
371             msg->status_code != SOUP_STATUS_MOVED_PERMANENTLY &&
372             msg->status_code != SOUP_STATUS_GONE)
373                 goto expire;
374
375         /* TODO: attach warning 113 if response's current_age is more
376            than 24h (section 2.3.1.1) when using heuristics */
377
378         /* Last-Modified based heuristic */
379         last_modified = soup_message_headers_get (entry->headers, "Last-Modified");
380         if (last_modified) {
381                 SoupDate *soup_date;
382                 time_t now, last_modified_t;
383
384                 soup_date = soup_date_new_from_string (last_modified);
385                 last_modified_t = soup_date_to_time_t (soup_date);
386                 now = time (NULL);
387
388 #define HEURISTIC_FACTOR 0.1 /* From Section 2.3.1.1 */
389
390                 entry->freshness_lifetime = MAX (0, (now - last_modified_t) * HEURISTIC_FACTOR);
391                 soup_date_free (soup_date);
392         }
393
394         return;
395
396  expire:
397         /* If all else fails, make the entry expire immediately */
398         entry->freshness_lifetime = 0;
399 }
400
401 static WebKitSoupCacheEntry *
402 webkit_soup_cache_entry_new (WebKitSoupCache *cache, SoupMessage *msg, time_t request_time, time_t response_time)
403 {
404         WebKitSoupCacheEntry *entry;
405         SoupMessageHeaders *headers;
406         const char *date;
407         char *md5;
408
409         entry = g_slice_new0 (WebKitSoupCacheEntry);
410         entry->dirty = FALSE;
411         entry->writing = FALSE;
412         entry->got_body = FALSE;
413         entry->being_validated = FALSE;
414         entry->data = g_string_new (NULL);
415         entry->pos = 0;
416         entry->error = NULL;
417
418         /* key & filename */
419         entry->key = soup_message_get_cache_key (msg);
420         md5 = g_compute_checksum_for_string (G_CHECKSUM_MD5, entry->key, -1);
421         entry->filename = g_build_filename (cache->priv->cache_dir, md5, NULL);
422         g_free (md5);
423
424         /* Headers */
425         headers = soup_message_headers_new (SOUP_MESSAGE_HEADERS_RESPONSE);
426         soup_message_headers_foreach (msg->response_headers,
427                                       (SoupMessageHeadersForeachFunc)copy_headers,
428                                       headers);
429         entry->headers = headers;
430
431         /* LRU list */
432         entry->hits = 0;
433
434         /* Section 2.3.1, Freshness Lifetime */
435         webkit_soup_cache_entry_set_freshness (entry, msg, cache);
436
437         /* Section 2.3.2, Calculating Age */
438         date = soup_message_headers_get (entry->headers, "Date");
439
440         if (date) {
441                 SoupDate *soup_date;
442                 const char *age;
443                 time_t date_value, apparent_age, corrected_received_age, response_delay, age_value = 0;
444
445                 soup_date = soup_date_new_from_string (date);
446                 date_value = soup_date_to_time_t (soup_date);
447                 soup_date_free (soup_date);
448
449                 age = soup_message_headers_get (entry->headers, "Age");
450                 if (age)
451                         age_value = g_ascii_strtoll (age, NULL, 10);
452
453                 entry->response_time = response_time;
454                 apparent_age = MAX (0, entry->response_time - date_value);
455                 corrected_received_age = MAX (apparent_age, age_value);
456                 response_delay = entry->response_time - request_time;
457                 entry->corrected_initial_age = corrected_received_age + response_delay;
458         } else {
459                 /* Is this correct ? */
460                 entry->corrected_initial_age = time (NULL);
461         }
462
463         return entry;
464 }
465
466 static void
467 webkit_soup_cache_writing_fixture_free (WebKitSoupCacheWritingFixture *fixture)
468 {
469         /* Free fixture. And disconnect signals, we don't want to
470            listen to more SoupMessage events as we're finished with
471            this resource */
472         if (g_signal_handler_is_connected (fixture->msg, fixture->got_chunk_handler))
473                 g_signal_handler_disconnect (fixture->msg, fixture->got_chunk_handler);
474         if (g_signal_handler_is_connected (fixture->msg, fixture->got_body_handler))
475                 g_signal_handler_disconnect (fixture->msg, fixture->got_body_handler);
476         if (g_signal_handler_is_connected (fixture->msg, fixture->restarted_handler))
477                 g_signal_handler_disconnect (fixture->msg, fixture->restarted_handler);
478         g_object_unref (fixture->msg);
479         g_object_unref (fixture->cache);
480         g_slice_free (WebKitSoupCacheWritingFixture, fixture);
481 }
482
483 static void
484 close_ready_cb (GObject *source, GAsyncResult *result, WebKitSoupCacheWritingFixture *fixture)
485 {
486         WebKitSoupCacheEntry *entry = fixture->entry;
487         WebKitSoupCache *cache = fixture->cache;
488         GOutputStream *stream = G_OUTPUT_STREAM (source);
489         goffset content_length;
490
491         g_warn_if_fail (entry->error == NULL);
492
493         /* FIXME: what do we do on error ? */
494
495         if (stream) {
496                 g_output_stream_close_finish (stream, result, NULL);
497                 g_object_unref (stream);
498         }
499         entry->stream = NULL;
500
501         content_length = soup_message_headers_get_content_length (entry->headers);
502
503         /* If the process was cancelled, then delete the entry from
504            the cache. Do it also if the size of a chunked resource is
505            too much for the cache */
506         if (g_cancellable_is_cancelled (entry->cancellable)) {
507                 entry->dirty = FALSE;
508                 webkit_soup_cache_entry_remove (cache, entry);
509                 webkit_soup_cache_entry_free (entry, TRUE);
510                 entry = NULL;
511         } else if ((soup_message_headers_get_encoding (entry->headers) == SOUP_ENCODING_CHUNKED) ||
512                    entry->length != content_length) {
513                 /** Two options here:
514                  *
515                  * 1. "chunked" data, entry was temporarily added to
516                  * cache (as content-length is 0) and now that we have
517                  * the actual size we have to evaluate if we want it
518                  * in the cache or not
519                  *
520                  * 2. Content-Length has a different value than actual
521                  * length, means that the content was encoded for
522                  * transmission (typically compressed) and thus we
523                  * have to substract the content-length value that was
524                  * added to the cache and add the unencoded length
525                  **/
526                 gint length_to_add = entry->length - content_length;
527
528                 /* Make room in cache if needed */
529                 if (cache_accepts_entries_of_size (cache, length_to_add)) {
530                         make_room_for_new_entry (cache, length_to_add);
531
532                         cache->priv->size += length_to_add;
533                 } else {
534                         entry->dirty = FALSE;
535                         webkit_soup_cache_entry_remove (cache, entry);
536                         webkit_soup_cache_entry_free (entry, TRUE);
537                         entry = NULL;
538                 }
539         }
540
541         if (entry) {
542                 /* Get rid of the GString in memory for the resource now */
543                 if (entry->data) {
544                         g_string_free (entry->data, TRUE);
545                         entry->data = NULL;
546                 }
547
548                 entry->dirty = FALSE;
549                 entry->writing = FALSE;
550                 entry->got_body = FALSE;
551                 entry->pos = 0;
552
553                 g_object_unref (entry->cancellable);
554                 entry->cancellable = NULL;
555         }
556
557         cache->priv->n_pending--;
558
559         /* Frees */
560         webkit_soup_cache_writing_fixture_free (fixture);
561 }
562
563 static void
564 write_ready_cb (GObject *source, GAsyncResult *result, WebKitSoupCacheWritingFixture *fixture)
565 {
566         GOutputStream *stream = G_OUTPUT_STREAM (source);
567         GError *error = NULL;
568         gssize write_size;
569         WebKitSoupCacheEntry *entry = fixture->entry;
570
571         if (g_cancellable_is_cancelled (entry->cancellable)) {
572                 g_output_stream_close_async (stream,
573                                              G_PRIORITY_LOW,
574                                              entry->cancellable,
575                                              (GAsyncReadyCallback)close_ready_cb,
576                                              fixture);
577                 return;
578         }
579
580         write_size = g_output_stream_write_finish (stream, result, &error);
581         if (write_size <= 0 || error) {
582                 if (error)
583                         entry->error = error;
584                 g_output_stream_close_async (stream,
585                                              G_PRIORITY_LOW,
586                                              entry->cancellable,
587                                              (GAsyncReadyCallback)close_ready_cb,
588                                              fixture);
589                 /* FIXME: We should completely stop caching the
590                    resource at this point */
591         } else {
592                 entry->pos += write_size;
593
594                 /* Are we still writing and is there new data to write
595                    already ? */
596                 if (entry->data && entry->pos < entry->data->len) {
597                         g_output_stream_write_async (entry->stream,
598                                                      entry->data->str + entry->pos,
599                                                      entry->data->len - entry->pos,
600                                                      G_PRIORITY_LOW,
601                                                      entry->cancellable,
602                                                      (GAsyncReadyCallback)write_ready_cb,
603                                                      fixture);
604                 } else {
605                         entry->writing = FALSE;
606
607                         if (entry->got_body) {
608                                 /* If we already received 'got-body'
609                                    and we have written all the data,
610                                    we can close the stream */
611                                 g_output_stream_close_async (entry->stream,
612                                                              G_PRIORITY_LOW,
613                                                              entry->cancellable,
614                                                              (GAsyncReadyCallback)close_ready_cb,
615                                                              fixture);
616                         }
617                 }
618         }
619 }
620
621 static void
622 msg_got_chunk_cb (SoupMessage *msg, SoupBuffer *chunk, WebKitSoupCacheWritingFixture *fixture)
623 {
624         WebKitSoupCacheEntry *entry = fixture->entry;
625
626         g_return_if_fail (chunk->data && chunk->length);
627         g_return_if_fail (entry);
628
629         /* Ignore this if the writing or appending was cancelled */
630         if (!g_cancellable_is_cancelled (entry->cancellable)) {
631                 g_string_append_len (entry->data, chunk->data, chunk->length);
632                 entry->length = entry->data->len;
633
634                 if (!cache_accepts_entries_of_size (fixture->cache, entry->length)) {
635                         /* Quickly cancel the caching of the resource */
636                         g_cancellable_cancel (entry->cancellable);
637                 }
638         }
639
640         /* FIXME: remove the error check when we cancel the caching at
641            the first write error */
642         /* Only write if the entry stream is ready */
643         if (entry->writing == FALSE && entry->error == NULL && entry->stream) {
644                 GString *data = entry->data;
645                 entry->writing = TRUE;
646                 g_output_stream_write_async (entry->stream,
647                                              data->str + entry->pos,
648                                              data->len - entry->pos,
649                                              G_PRIORITY_LOW,
650                                              entry->cancellable,
651                                              (GAsyncReadyCallback)write_ready_cb,
652                                              fixture);
653         }
654 }
655
656 static void
657 msg_got_body_cb (SoupMessage *msg, WebKitSoupCacheWritingFixture *fixture)
658 {
659         WebKitSoupCacheEntry *entry = fixture->entry;
660         g_return_if_fail (entry);
661
662         entry->got_body = TRUE;
663
664         if (!entry->stream && entry->pos != entry->length)
665                 /* The stream is not ready to be written but we still
666                    have data to write, we'll write it when the stream
667                    is opened for writing */
668                 return;
669
670
671         if (entry->pos != entry->length) {
672                 /* If we still have data to write, write it,
673                    write_ready_cb will close the stream */
674                 if (entry->writing == FALSE && entry->error == NULL && entry->stream) {
675                         g_output_stream_write_async (entry->stream,
676                                                      entry->data->str + entry->pos,
677                                                      entry->data->len - entry->pos,
678                                                      G_PRIORITY_LOW,
679                                                      entry->cancellable,
680                                                      (GAsyncReadyCallback)write_ready_cb,
681                                                      fixture);
682                 }
683                 return;
684         }
685
686         if (entry->stream && !entry->writing)
687                 g_output_stream_close_async (entry->stream,
688                                              G_PRIORITY_LOW,
689                                              entry->cancellable,
690                                              (GAsyncReadyCallback)close_ready_cb,
691                                              fixture);
692 }
693
694 static gboolean
695 webkit_soup_cache_entry_remove (WebKitSoupCache *cache, WebKitSoupCacheEntry *entry)
696 {
697         GList *lru_item;
698
699         /* if (entry->dirty && !g_cancellable_is_cancelled (entry->cancellable)) { */
700         if (entry->dirty) {
701                 g_cancellable_cancel (entry->cancellable);
702                 return FALSE;
703         }
704
705         g_assert (g_list_length (cache->priv->lru_start) == g_hash_table_size (cache->priv->cache));
706
707         /* Remove from cache */
708         if (!g_hash_table_remove (cache->priv->cache, entry->key))
709                 return FALSE;
710
711         /* Remove from LRU */
712         lru_item = g_list_find (cache->priv->lru_start, entry);
713         cache->priv->lru_start = g_list_delete_link (cache->priv->lru_start, lru_item);
714
715         /* Adjust cache size */
716         cache->priv->size -= entry->length;
717
718         g_assert (g_list_length (cache->priv->lru_start) == g_hash_table_size (cache->priv->cache));
719
720         return TRUE;
721 }
722
723 static gint
724 lru_compare_func (gconstpointer a, gconstpointer b)
725 {
726         WebKitSoupCacheEntry *entry_a = (WebKitSoupCacheEntry *)a;
727         WebKitSoupCacheEntry *entry_b = (WebKitSoupCacheEntry *)b;
728
729         /** The rationale of this sorting func is
730          *
731          * 1. sort by hits -> LRU algorithm, then
732          *
733          * 2. sort by freshness lifetime, we better discard first
734          * entries that are close to expire
735          *
736          * 3. sort by size, replace first small size resources as they
737          * are cheaper to download
738          **/
739
740         /* Sort by hits */
741         if (entry_a->hits != entry_b->hits)
742                 return entry_a->hits - entry_b->hits;
743
744         /* Sort by freshness_lifetime */
745         if (entry_a->freshness_lifetime != entry_b->freshness_lifetime)
746                 return entry_a->freshness_lifetime - entry_b->freshness_lifetime;
747
748         /* Sort by size */
749         return entry_a->length - entry_b->length;
750 }
751
752 static gboolean
753 cache_accepts_entries_of_size (WebKitSoupCache *cache, guint length_to_add)
754 {
755         /* We could add here some more heuristics. TODO: review how
756            this is done by other HTTP caches */
757
758         return length_to_add <= cache->priv->max_entry_data_size;
759 }
760
761 static void
762 make_room_for_new_entry (WebKitSoupCache *cache, guint length_to_add)
763 {
764         GList *lru_entry = cache->priv->lru_start;
765
766         /* Check that there is enough room for the new entry. This is
767            an approximation as we're not working out the size of the
768            cache file or the size of the headers for performance
769            reasons. TODO: check if that would be really that expensive */
770
771         while (lru_entry &&
772                (length_to_add + cache->priv->size > cache->priv->max_size)) {
773                 WebKitSoupCacheEntry *old_entry = (WebKitSoupCacheEntry *)lru_entry->data;
774
775                 /* Discard entries. Once cancelled resources will be
776                  * freed in close_ready_cb
777                  */
778                 if (webkit_soup_cache_entry_remove (cache, old_entry)) {
779                         webkit_soup_cache_entry_free (old_entry, TRUE);
780                         lru_entry = cache->priv->lru_start;
781                 } else
782                         lru_entry = g_list_next (lru_entry);
783         }
784 }
785
786 static gboolean
787 webkit_soup_cache_entry_insert_by_key (WebKitSoupCache *cache,
788                                        const char *key,
789                                        WebKitSoupCacheEntry *entry,
790                                        gboolean sort)
791 {
792         guint length_to_add = 0;
793
794         if (soup_message_headers_get_encoding (entry->headers) != SOUP_ENCODING_CHUNKED)
795                 length_to_add = soup_message_headers_get_content_length (entry->headers);
796
797         /* Check if we are going to store the resource depending on its size */
798         if (length_to_add) {
799                 if (!cache_accepts_entries_of_size (cache, length_to_add))
800                         return FALSE;
801
802                 /* Make room for new entry if needed */
803                 make_room_for_new_entry (cache, length_to_add);
804         }
805
806         g_hash_table_insert (cache->priv->cache, g_strdup (key), entry);
807
808         /* Compute new cache size */
809         cache->priv->size += length_to_add;
810
811         /* Update LRU */
812         if (sort)
813                 cache->priv->lru_start = g_list_insert_sorted (cache->priv->lru_start, entry, lru_compare_func);
814         else
815                 cache->priv->lru_start = g_list_prepend (cache->priv->lru_start, entry);
816
817         g_assert (g_list_length (cache->priv->lru_start) == g_hash_table_size (cache->priv->cache));
818
819         return TRUE;
820 }
821
822 static void
823 msg_restarted_cb (SoupMessage *msg, WebKitSoupCacheEntry *entry)
824 {
825         /* FIXME: What should we do here exactly? */
826 }
827
828 static void
829 append_to_ready_cb (GObject *source, GAsyncResult *result, WebKitSoupCacheWritingFixture *fixture)
830 {
831         GFile *file = (GFile *)source;
832         GOutputStream *stream;
833         WebKitSoupCacheEntry *entry = fixture->entry;
834
835         stream = (GOutputStream *)g_file_append_to_finish (file, result, &entry->error);
836
837         if (g_cancellable_is_cancelled (entry->cancellable) || entry->error) {
838                 fixture->cache->priv->n_pending--;
839                 entry->dirty = FALSE;
840                 webkit_soup_cache_entry_remove (fixture->cache, entry);
841                 webkit_soup_cache_entry_free (entry, TRUE);
842                 webkit_soup_cache_writing_fixture_free (fixture);
843                 return;
844         }
845
846         entry->stream = g_object_ref (stream);
847         g_object_unref (file);
848
849         /* If we already got all the data we have to initiate the
850            writing here, since we won't get more 'got-chunk'
851            signals */
852         if (entry->got_body) {
853                 GString *data = entry->data;
854
855                 /* It could happen that reading the data from server
856                    was completed before this happens. In that case
857                    there is no data */
858                 if (data) {
859                         entry->writing = TRUE;
860                         g_output_stream_write_async (entry->stream,
861                                                      data->str + entry->pos,
862                                                      data->len - entry->pos,
863                                                      G_PRIORITY_LOW,
864                                                      entry->cancellable,
865                                                      (GAsyncReadyCallback)write_ready_cb,
866                                                      fixture);
867                 }
868         }
869 }
870
871 typedef struct {
872         time_t request_time;
873         SoupSessionFeature *feature;
874         gulong got_headers_handler;
875 } RequestHelper;
876
877 static void
878 msg_got_headers_cb (SoupMessage *msg, gpointer user_data)
879 {
880         WebKitSoupCache *cache;
881         WebKitSoupCacheability cacheable;
882         RequestHelper *helper;
883         time_t request_time, response_time;
884
885         response_time = time (NULL);
886
887         helper = (RequestHelper *)user_data;
888         cache = WEBKIT_SOUP_CACHE (helper->feature);
889         request_time = helper->request_time;
890         g_signal_handlers_disconnect_by_func (msg, msg_got_headers_cb, user_data);
891         g_slice_free (RequestHelper, helper);
892
893         cacheable = webkit_soup_cache_get_cacheability (cache, msg);
894
895         if (cacheable & WEBKIT_SOUP_CACHE_CACHEABLE) {
896                 WebKitSoupCacheEntry *entry;
897                 char *key;
898                 GFile *file;
899                 WebKitSoupCacheWritingFixture *fixture;
900
901                 /* Check if we are already caching this resource */
902                 key = soup_message_get_cache_key (msg);
903                 entry = g_hash_table_lookup (cache->priv->cache, key);
904                 g_free (key);
905
906                 if (entry && entry->dirty)
907                         return;
908
909                 /* Create a new entry, deleting any old one if present */
910                 if (entry) {
911                         webkit_soup_cache_entry_remove (cache, entry);
912                         webkit_soup_cache_entry_free (entry, TRUE);
913                 }
914
915                 entry = webkit_soup_cache_entry_new (cache, msg, request_time, response_time);
916                 entry->hits = 1;
917
918                 /* Do not continue if it can not be stored */
919                 if (!webkit_soup_cache_entry_insert_by_key (cache, (const gchar *)entry->key, entry, TRUE)) {
920                         webkit_soup_cache_entry_free (entry, TRUE);
921                         return;
922                 }
923
924                 fixture = g_slice_new0 (WebKitSoupCacheWritingFixture);
925                 fixture->cache = g_object_ref (cache);
926                 fixture->entry = entry;
927                 fixture->msg = g_object_ref (msg);
928
929                 /* We connect now to these signals and buffer the data
930                    if it comes before the file is ready for writing */
931                 fixture->got_chunk_handler =
932                         g_signal_connect (msg, "got-chunk", G_CALLBACK (msg_got_chunk_cb), fixture);
933                 fixture->got_body_handler =
934                         g_signal_connect (msg, "got-body", G_CALLBACK (msg_got_body_cb), fixture);
935                 fixture->restarted_handler =
936                         g_signal_connect (msg, "restarted", G_CALLBACK (msg_restarted_cb), entry);
937
938                 /* Prepare entry */
939                 file = g_file_new_for_path (entry->filename);
940                 cache->priv->n_pending++;
941
942                 entry->dirty = TRUE;
943                 entry->cancellable = g_cancellable_new ();
944                 g_file_append_to_async (file, 0,
945                                         G_PRIORITY_LOW, entry->cancellable,
946                                         (GAsyncReadyCallback)append_to_ready_cb,
947                                         fixture);
948         } else if (cacheable & WEBKIT_SOUP_CACHE_INVALIDATES) {
949                 char *key;
950                 WebKitSoupCacheEntry *entry;
951
952                 key = soup_message_get_cache_key (msg);
953                 entry = g_hash_table_lookup (cache->priv->cache, key);
954                 g_free (key);
955
956                 if (entry) {
957                         if (webkit_soup_cache_entry_remove (cache, entry))
958                                 webkit_soup_cache_entry_free (entry, TRUE);
959                 }
960         } else if (cacheable & WEBKIT_SOUP_CACHE_VALIDATES) {
961                 char *key;
962                 WebKitSoupCacheEntry *entry;
963
964                 key = soup_message_get_cache_key (msg);
965                 entry = g_hash_table_lookup (cache->priv->cache, key);
966                 g_free (key);
967
968                 g_return_if_fail (entry);
969
970                 entry->being_validated = FALSE;
971
972                 /* We update the headers of the existing cache item,
973                    plus its age */
974                 soup_message_headers_foreach (msg->response_headers,
975                                               (SoupMessageHeadersForeachFunc)update_headers,
976                                               entry->headers);
977                 webkit_soup_cache_entry_set_freshness (entry, msg, cache);
978         }
979 }
980
981 GInputStream *
982 webkit_soup_cache_send_response (WebKitSoupCache *cache, SoupMessage *msg)
983 {
984         char *key;
985         WebKitSoupCacheEntry *entry;
986         char *current_age;
987         GInputStream *stream = NULL;
988         GFile *file;
989
990         g_return_val_if_fail (WEBKIT_IS_SOUP_CACHE (cache), NULL);
991         g_return_val_if_fail (SOUP_IS_MESSAGE (msg), NULL);
992
993         key = soup_message_get_cache_key (msg);
994         entry = g_hash_table_lookup (cache->priv->cache, key);
995         g_return_val_if_fail (entry, NULL);
996
997         /* If we are told to send a response from cache any validation
998            in course is over by now */
999         entry->being_validated = FALSE;
1000
1001         /* Headers */
1002         soup_message_headers_foreach (entry->headers,
1003                                       (SoupMessageHeadersForeachFunc)update_headers,
1004                                       msg->response_headers);
1005
1006         /* Add 'Age' header with the current age */
1007         current_age = g_strdup_printf ("%d", webkit_soup_cache_entry_get_current_age (entry));
1008         soup_message_headers_replace (msg->response_headers,
1009                                       "Age",
1010                                       current_age);
1011         g_free (current_age);
1012
1013         /* TODO: the original idea was to save reads, but current code
1014            assumes that a stream is always returned. Need to reach
1015            some agreement here. Also we have to handle the situation
1016            were the file was no longer there (for example files
1017            removed without notifying the cache */
1018         file = g_file_new_for_path (entry->filename);
1019         stream = (GInputStream *)g_file_read (file, NULL, NULL);
1020
1021         return stream;
1022 }
1023
1024 static void
1025 request_started (SoupSessionFeature *feature, SoupSession *session,
1026                  SoupMessage *msg, SoupSocket *socket)
1027 {
1028         RequestHelper *helper = g_slice_new0 (RequestHelper);
1029         helper->request_time = time (NULL);
1030         helper->feature = feature;
1031         helper->got_headers_handler = g_signal_connect (msg, "got-headers",
1032                                                         G_CALLBACK (msg_got_headers_cb),
1033                                                         helper);
1034 }
1035
1036 static void
1037 attach (SoupSessionFeature *feature, SoupSession *session)
1038 {
1039         WebKitSoupCache *cache = WEBKIT_SOUP_CACHE (feature);
1040         cache->priv->session = session;
1041
1042         webkit_soup_cache_default_feature_interface->attach (feature, session);
1043 }
1044
1045 static void
1046 webkit_soup_cache_session_feature_init (SoupSessionFeatureInterface *feature_interface,
1047                                         gpointer interface_data)
1048 {
1049         webkit_soup_cache_default_feature_interface =
1050                 g_type_default_interface_peek (SOUP_TYPE_SESSION_FEATURE);
1051
1052         feature_interface->attach = attach;
1053         feature_interface->request_started = request_started;
1054 }
1055
1056 static void
1057 webkit_soup_cache_init (WebKitSoupCache *cache)
1058 {
1059         WebKitSoupCachePrivate *priv;
1060
1061         priv = cache->priv = WEBKIT_SOUP_CACHE_GET_PRIVATE (cache);
1062
1063         priv->cache = g_hash_table_new_full (g_str_hash,
1064                                              g_str_equal,
1065                                              (GDestroyNotify)g_free,
1066                                              NULL);
1067
1068         /* LRU */
1069         priv->lru_start = NULL;
1070
1071         /* */
1072         priv->n_pending = 0;
1073
1074         /* Cache size */
1075         priv->max_size = DEFAULT_MAX_SIZE;
1076         priv->max_entry_data_size = priv->max_size / MAX_ENTRY_DATA_PERCENTAGE;
1077         priv->size = 0;
1078 }
1079
1080 static void
1081 remove_cache_item (gpointer key,
1082                    gpointer value,
1083                    WebKitSoupCache *cache)
1084 {
1085         WebKitSoupCacheEntry *entry = g_hash_table_lookup (cache->priv->cache, (const gchar *)key);
1086         if (webkit_soup_cache_entry_remove (cache, entry))
1087                 webkit_soup_cache_entry_free (entry, FALSE);
1088 }
1089
1090 static void
1091 webkit_soup_cache_finalize (GObject *object)
1092 {
1093         WebKitSoupCachePrivate *priv;
1094
1095         priv = WEBKIT_SOUP_CACHE (object)->priv;
1096
1097         g_hash_table_foreach (priv->cache, (GHFunc)remove_cache_item, object);
1098         g_hash_table_destroy (priv->cache);
1099         g_free (priv->cache_dir);
1100
1101         g_list_free (priv->lru_start);
1102         priv->lru_start = NULL;
1103
1104         G_OBJECT_CLASS (webkit_soup_cache_parent_class)->finalize (object);
1105 }
1106
1107 static void
1108 webkit_soup_cache_set_property (GObject *object, guint prop_id,
1109                                 const GValue *value, GParamSpec *pspec)
1110 {
1111         WebKitSoupCachePrivate *priv = WEBKIT_SOUP_CACHE (object)->priv;
1112
1113         switch (prop_id) {
1114         case PROP_CACHE_DIR:
1115                 priv->cache_dir = g_value_dup_string (value);
1116                 /* Create directory if it does not exist (FIXME: should we?) */
1117                 if (!g_file_test (priv->cache_dir, G_FILE_TEST_EXISTS | G_FILE_TEST_IS_DIR))
1118                         g_mkdir_with_parents (priv->cache_dir, 0700);
1119                 break;
1120         case PROP_CACHE_TYPE:
1121                 priv->cache_type = g_value_get_enum (value);
1122                 /* TODO: clear private entries and issue a warning if moving to shared? */
1123                 break;
1124         default:
1125                 G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
1126                 break;
1127         }
1128 }
1129
1130 static void
1131 webkit_soup_cache_get_property (GObject *object, guint prop_id,
1132                                 GValue *value, GParamSpec *pspec)
1133 {
1134         WebKitSoupCachePrivate *priv = WEBKIT_SOUP_CACHE (object)->priv;
1135
1136         switch (prop_id) {
1137         case PROP_CACHE_DIR:
1138                 g_value_set_string (value, priv->cache_dir);
1139                 break;
1140         case PROP_CACHE_TYPE:
1141                 g_value_set_enum (value, priv->cache_type);
1142                 break;
1143         default:
1144                 G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
1145                 break;
1146         }
1147 }
1148
1149 static void
1150 webkit_soup_cache_constructed (GObject *object)
1151 {
1152         WebKitSoupCachePrivate *priv;
1153
1154         priv = WEBKIT_SOUP_CACHE (object)->priv;
1155
1156         if (!priv->cache_dir) {
1157                 /* Set a default cache dir, different for each user */
1158                 priv->cache_dir = g_build_filename (g_get_user_cache_dir (),
1159                                                     "httpcache",
1160                                                     NULL);
1161                 if (!g_file_test (priv->cache_dir, G_FILE_TEST_EXISTS | G_FILE_TEST_IS_DIR))
1162                         g_mkdir_with_parents (priv->cache_dir, 0700);
1163         }
1164
1165         if (G_OBJECT_CLASS (webkit_soup_cache_parent_class)->constructed)
1166                 G_OBJECT_CLASS (webkit_soup_cache_parent_class)->constructed (object);
1167 }
1168
1169 #define WEBKIT_SOUP_CACHE_TYPE_TYPE (webkit_soup_cache_type_get_type ())
1170 static GType
1171 webkit_soup_cache_type_get_type (void)
1172 {
1173         static GType cache_type = 0;
1174
1175         static const GEnumValue cache_types[] = {
1176                 { WEBKIT_SOUP_CACHE_SINGLE_USER, "Single user cache", "user" },
1177                 { WEBKIT_SOUP_CACHE_SHARED, "Shared cache", "shared" },
1178                 { 0, NULL, NULL }
1179         };
1180
1181         if (!cache_type) {
1182                 cache_type = g_enum_register_static ("WebKitSoupCacheType", cache_types);
1183         }
1184         return cache_type;
1185 }
1186
1187 static void
1188 webkit_soup_cache_class_init (WebKitSoupCacheClass *cache_class)
1189 {
1190         GObjectClass *gobject_class = (GObjectClass *)cache_class;
1191
1192         gobject_class->finalize = webkit_soup_cache_finalize;
1193         gobject_class->constructed = webkit_soup_cache_constructed;
1194         gobject_class->set_property = webkit_soup_cache_set_property;
1195         gobject_class->get_property = webkit_soup_cache_get_property;
1196
1197         cache_class->get_cacheability = get_cacheability;
1198
1199         g_object_class_install_property (gobject_class, PROP_CACHE_DIR,
1200                                          g_param_spec_string ("cache-dir",
1201                                                               "Cache directory",
1202                                                               "The directory to store the cache files",
1203                                                               NULL,
1204                                                               G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY));
1205
1206         g_object_class_install_property (gobject_class, PROP_CACHE_TYPE,
1207                                          g_param_spec_enum ("cache-type",
1208                                                             "Cache type",
1209                                                             "Whether the cache is private or shared",
1210                                                             WEBKIT_SOUP_CACHE_TYPE_TYPE,
1211                                                             WEBKIT_SOUP_CACHE_SINGLE_USER,
1212                                                             G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY));
1213
1214         g_type_class_add_private (cache_class, sizeof (WebKitSoupCachePrivate));
1215 }
1216
1217 /**
1218  * webkit_soup_cache_new:
1219  * @cache_dir: the directory to store the cached data, or %NULL to use the default one
1220  * @cache_type: the #WebKitSoupCacheType of the cache
1221  *
1222  * Creates a new #WebKitSoupCache.
1223  *
1224  * Returns: a new #WebKitSoupCache
1225  *
1226  * Since: 2.28
1227  **/
1228 WebKitSoupCache *
1229 webkit_soup_cache_new (const char *cache_dir, WebKitSoupCacheType cache_type)
1230 {
1231         return g_object_new (WEBKIT_TYPE_SOUP_CACHE,
1232                              "cache-dir", cache_dir,
1233                              "cache-type", cache_type,
1234                              NULL);
1235 }
1236
1237 /**
1238  * webkit_soup_cache_has_response:
1239  * @cache: a #WebKitSoupCache
1240  * @msg: a #SoupMessage
1241  *
1242  * This function calculates whether the @cache object has a proper
1243  * response for the request @msg given the flags both in the request
1244  * and the cached reply and the time ellapsed since it was cached.
1245  *
1246  * Returns: whether or not the @cache has a valid response for @msg
1247  **/
1248 WebKitSoupCacheResponse
1249 webkit_soup_cache_has_response (WebKitSoupCache *cache, SoupMessage *msg)
1250 {
1251         char *key;
1252         WebKitSoupCacheEntry *entry;
1253         const char *cache_control;
1254         GHashTable *hash;
1255         gpointer value;
1256         gboolean must_revalidate;
1257         int max_age, max_stale, min_fresh;
1258         GList *lru_item, *item;
1259
1260         key = soup_message_get_cache_key (msg);
1261         entry = g_hash_table_lookup (cache->priv->cache, key);
1262
1263         /* 1. The presented Request-URI and that of stored response
1264          * match
1265          */
1266         if (!entry)
1267                 return WEBKIT_SOUP_CACHE_RESPONSE_STALE;
1268
1269         /* Increase hit count. Take sorting into account */
1270         entry->hits++;
1271         lru_item = g_list_find (cache->priv->lru_start, entry);
1272         item = lru_item;
1273         while (item->next && lru_compare_func (item->data, item->next->data) > 0)
1274                 item = g_list_next (item);
1275
1276         if (item != lru_item) {
1277                 cache->priv->lru_start = g_list_remove_link (cache->priv->lru_start, lru_item);
1278                 item = g_list_insert_sorted (item, lru_item->data, lru_compare_func);
1279                 g_list_free (lru_item);
1280         }
1281
1282         if (entry->dirty || entry->being_validated)
1283                 return WEBKIT_SOUP_CACHE_RESPONSE_STALE;
1284
1285         /* 2. The request method associated with the stored response
1286          *  allows it to be used for the presented request
1287          */
1288
1289         /* In practice this means we only return our resource for GET,
1290          * cacheability for other methods is a TODO in the RFC
1291          * (TODO: although we could return the headers for HEAD
1292          * probably).
1293          */
1294         if (msg->method != SOUP_METHOD_GET)
1295                 return WEBKIT_SOUP_CACHE_RESPONSE_STALE;
1296
1297         /* 3. Selecting request-headers nominated by the stored
1298          * response (if any) match those presented.
1299          */
1300
1301         /* TODO */
1302
1303         /* 4. The presented request and stored response are free from
1304          * directives that would prevent its use.
1305          */
1306
1307         must_revalidate = FALSE;
1308         max_age = max_stale = min_fresh = -1;
1309
1310         cache_control = soup_message_headers_get (msg->request_headers, "Cache-Control");
1311         if (cache_control) {
1312                 hash = soup_header_parse_param_list (cache_control);
1313
1314                 if (g_hash_table_lookup_extended (hash, "no-store", NULL, NULL)) {
1315                         g_hash_table_destroy (hash);
1316                         return WEBKIT_SOUP_CACHE_RESPONSE_STALE;
1317                 }
1318
1319                 if (g_hash_table_lookup_extended (hash, "no-cache", NULL, NULL)) {
1320                         entry->must_revalidate = TRUE;
1321                 }
1322
1323                 if (g_hash_table_lookup_extended (hash, "max-age", NULL, &value)) {
1324                         max_age = (int)MIN (g_ascii_strtoll (value, NULL, 10), G_MAXINT32);
1325                 }
1326
1327                 /* max-stale can have no value set, we need to use _extended */
1328                 if (g_hash_table_lookup_extended (hash, "max-stale", NULL, &value)) {
1329                         if (value)
1330                                 max_stale = (int)MIN (g_ascii_strtoll (value, NULL, 10), G_MAXINT32);
1331                         else
1332                                 max_stale = G_MAXINT32;
1333                 }
1334
1335                 value = g_hash_table_lookup (hash, "min-fresh");
1336                 if (value)
1337                         min_fresh = (int)MIN (g_ascii_strtoll (value, NULL, 10), G_MAXINT32);
1338
1339                 g_hash_table_destroy (hash);
1340
1341                 if (max_age != -1) {
1342                         guint current_age = webkit_soup_cache_entry_get_current_age (entry);
1343
1344                         /* If we are over max-age and max-stale is not
1345                            set, do not use the value from the cache
1346                            without validation */
1347                         if (max_age <= current_age && max_stale == -1)
1348                                 return WEBKIT_SOUP_CACHE_RESPONSE_NEEDS_VALIDATION;
1349                 }
1350         }
1351
1352         /* 5. The stored response is either: fresh, allowed to be
1353          * served stale or succesfully validated
1354          */
1355         /* TODO consider also proxy-revalidate & s-maxage */
1356         if (entry->must_revalidate)
1357                 return WEBKIT_SOUP_CACHE_RESPONSE_NEEDS_VALIDATION;
1358
1359         if (!webkit_soup_cache_entry_is_fresh_enough (entry, min_fresh)) {
1360                 /* Not fresh, can it be served stale? */
1361                 if (max_stale != -1) {
1362                         /* G_MAXINT32 means we accept any staleness */
1363                         if (max_stale == G_MAXINT32)
1364                                 return WEBKIT_SOUP_CACHE_RESPONSE_FRESH;
1365
1366                         if ((webkit_soup_cache_entry_get_current_age (entry) - entry->freshness_lifetime) <= max_stale)
1367                                 return WEBKIT_SOUP_CACHE_RESPONSE_FRESH;
1368                 }
1369
1370                 return WEBKIT_SOUP_CACHE_RESPONSE_NEEDS_VALIDATION;
1371         }
1372
1373         return WEBKIT_SOUP_CACHE_RESPONSE_FRESH;
1374 }
1375
1376 /**
1377  * webkit_soup_cache_get_cacheability:
1378  * @cache: a #WebKitSoupCache
1379  * @msg: a #SoupMessage
1380  *
1381  * Calculates whether the @msg can be cached or not.
1382  *
1383  * Returns: a #WebKitSoupCacheability value indicating whether the @msg can be cached or not.
1384  **/
1385 WebKitSoupCacheability
1386 webkit_soup_cache_get_cacheability (WebKitSoupCache *cache, SoupMessage *msg)
1387 {
1388         g_return_val_if_fail (WEBKIT_IS_SOUP_CACHE (cache), WEBKIT_SOUP_CACHE_UNCACHEABLE);
1389         g_return_val_if_fail (SOUP_IS_MESSAGE (msg), WEBKIT_SOUP_CACHE_UNCACHEABLE);
1390
1391         return WEBKIT_SOUP_CACHE_GET_CLASS (cache)->get_cacheability (cache, msg);
1392 }
1393
1394 static gboolean
1395 force_flush_timeout (gpointer data)
1396 {
1397         gboolean *forced = (gboolean *)data;
1398         *forced = TRUE;
1399
1400         return FALSE;
1401 }
1402
1403 /**
1404  * webkit_soup_cache_flush:
1405  * @cache: a #WebKitSoupCache
1406  * @session: the #SoupSession associated with the @cache
1407  *
1408  * This function will force all pending writes in the @cache to be
1409  * committed to disk. For doing so it will iterate the #GMainContext
1410  * associated with the @session (which can be the default one) as long
1411  * as needed.
1412  **/
1413 void
1414 webkit_soup_cache_flush (WebKitSoupCache *cache)
1415 {
1416         GMainContext *async_context;
1417         SoupSession *session;
1418         guint timeout_id;
1419         gboolean forced = FALSE;
1420
1421         g_return_if_fail (WEBKIT_IS_SOUP_CACHE (cache));
1422
1423         session = cache->priv->session;
1424         g_return_if_fail (SOUP_IS_SESSION (session));
1425         async_context = soup_session_get_async_context (session);
1426
1427         /* We give cache 10 secs to finish */
1428         timeout_id = g_timeout_add (10000, force_flush_timeout, &forced);
1429
1430         while (!forced && cache->priv->n_pending > 0)
1431                 g_main_context_iteration (async_context, FALSE);
1432
1433         if (!forced)
1434                 g_source_remove (timeout_id);
1435         else
1436                 g_warning ("Cache flush finished despite %d pending requests", cache->priv->n_pending);
1437 }
1438
1439 static void
1440 clear_cache_item (gpointer key,
1441                   gpointer value,
1442                   WebKitSoupCache *cache)
1443 {
1444         WebKitSoupCacheEntry *entry = g_hash_table_lookup (cache->priv->cache, (const gchar *)key);
1445         if (webkit_soup_cache_entry_remove (cache, entry))
1446                 webkit_soup_cache_entry_free (entry, TRUE);
1447 }
1448
1449 /**
1450  * webkit_soup_cache_clear:
1451  * @cache: a #WebKitSoupCache
1452  *
1453  * Will remove all entries in the @cache plus all the cache files
1454  * associated with them.
1455  **/
1456 void
1457 webkit_soup_cache_clear (WebKitSoupCache *cache)
1458 {
1459         GHashTable *hash;
1460
1461         g_return_if_fail (WEBKIT_IS_SOUP_CACHE (cache));
1462
1463         hash = cache->priv->cache;
1464         g_return_if_fail (hash);
1465
1466         g_hash_table_foreach (hash, (GHFunc)clear_cache_item, cache);
1467 }
1468
1469 SoupMessage *
1470 webkit_soup_cache_generate_conditional_request (WebKitSoupCache *cache, SoupMessage *original)
1471 {
1472         SoupMessage *msg;
1473         SoupURI *uri;
1474         WebKitSoupCacheEntry *entry;
1475         char *key;
1476         const char *value;
1477
1478         g_return_val_if_fail (WEBKIT_IS_SOUP_CACHE (cache), NULL);
1479         g_return_val_if_fail (SOUP_IS_MESSAGE (original), NULL);
1480
1481         /* First copy the data we need from the original message */
1482         uri = soup_message_get_uri (original);
1483         msg = soup_message_new_from_uri (original->method, uri);
1484
1485         soup_message_headers_foreach (original->request_headers,
1486                                       (SoupMessageHeadersForeachFunc)copy_headers,
1487                                       msg->request_headers);
1488
1489         /* Now add the validator entries in the header from the cached
1490            data */
1491         key = soup_message_get_cache_key (original);
1492         entry = g_hash_table_lookup (cache->priv->cache, key);
1493         g_free (key);
1494
1495         g_return_val_if_fail (entry, NULL);
1496
1497         entry->being_validated = TRUE;
1498
1499         value = soup_message_headers_get (entry->headers, "Last-Modified");
1500         if (value)
1501                 soup_message_headers_append (msg->request_headers,
1502                                              "If-Modified-Since",
1503                                              value);
1504         value = soup_message_headers_get (entry->headers, "ETag");
1505         if (value)
1506                 soup_message_headers_append (msg->request_headers,
1507                                              "If-None-Match",
1508                                              value);
1509         return msg;
1510 }
1511
1512 #define WEBKIT_SOUP_CACHE_FILE "soup.cache"
1513
1514 #define WEBKIT_SOUP_CACHE_HEADERS_FORMAT "{ss}"
1515 #define WEBKIT_SOUP_CACHE_PHEADERS_FORMAT "(ssbuuuuua" WEBKIT_SOUP_CACHE_HEADERS_FORMAT ")"
1516 #define WEBKIT_SOUP_CACHE_ENTRIES_FORMAT "a" WEBKIT_SOUP_CACHE_PHEADERS_FORMAT
1517
1518 /* Basically the same format than above except that some strings are
1519    prepended with &. This way the GVariant returns a pointer to the
1520    data instead of duplicating the string */
1521 #define WEBKIT_SOUP_CACHE_DECODE_HEADERS_FORMAT "{&s&s}"
1522
1523 static void
1524 pack_entry (gpointer data,
1525             gpointer user_data)
1526 {
1527         WebKitSoupCacheEntry *entry = (WebKitSoupCacheEntry *) data;
1528         SoupMessageHeadersIter iter;
1529         const gchar *header_key, *header_value;
1530         GVariantBuilder *headers_builder;
1531         GVariantBuilder *entries_builder = (GVariantBuilder *)user_data;
1532
1533         /* Do not store non-consolidated entries */
1534         if (entry->dirty || entry->writing || !entry->key)
1535                 return;
1536
1537         /* Pack headers */
1538         headers_builder = g_variant_builder_new (G_VARIANT_TYPE_ARRAY);
1539         soup_message_headers_iter_init (&iter, entry->headers);
1540         while (soup_message_headers_iter_next (&iter, &header_key, &header_value)) {
1541                 if (g_utf8_validate (header_value, -1, NULL))
1542                         g_variant_builder_add (headers_builder, WEBKIT_SOUP_CACHE_HEADERS_FORMAT,
1543                                                header_key, header_value);
1544         }
1545
1546         /* Entry data */
1547         g_variant_builder_add (entries_builder, WEBKIT_SOUP_CACHE_PHEADERS_FORMAT,
1548                                entry->key, entry->filename, entry->must_revalidate,
1549                                entry->freshness_lifetime, entry->corrected_initial_age,
1550                                entry->response_time, entry->hits, entry->length, headers_builder);
1551
1552         g_variant_builder_unref (headers_builder);
1553 }
1554
1555 void
1556 webkit_soup_cache_dump (WebKitSoupCache *cache)
1557 {
1558         WebKitSoupCachePrivate *priv = WEBKIT_SOUP_CACHE_GET_PRIVATE (cache);
1559         gchar *filename;
1560         GVariantBuilder *entries_builder;
1561         GVariant *cache_variant;
1562
1563         if (!g_list_length (cache->priv->lru_start))
1564                 return;
1565
1566         /* Create the builder and iterate over all entries */
1567         entries_builder = g_variant_builder_new (G_VARIANT_TYPE_ARRAY);
1568         g_list_foreach (cache->priv->lru_start, pack_entry, entries_builder);
1569
1570         /* Serialize and dump */
1571         cache_variant = g_variant_new (WEBKIT_SOUP_CACHE_ENTRIES_FORMAT, entries_builder);
1572         g_variant_builder_unref (entries_builder);
1573
1574         filename = g_build_filename (priv->cache_dir, WEBKIT_SOUP_CACHE_FILE, NULL);
1575         g_file_set_contents (filename, (const gchar *)g_variant_get_data (cache_variant),
1576                              g_variant_get_size (cache_variant), NULL);
1577         g_free (filename);
1578         g_variant_unref (cache_variant);
1579 }
1580
1581 void
1582 webkit_soup_cache_load (WebKitSoupCache *cache)
1583 {
1584         gchar *filename = NULL, *contents = NULL;
1585         GVariant *cache_variant;
1586         GVariantIter *entries_iter, *headers_iter;
1587         GVariantType *variant_format;
1588         gsize length;
1589         WebKitSoupCacheEntry *entry;
1590         WebKitSoupCachePrivate *priv = cache->priv;
1591
1592         filename = g_build_filename (priv->cache_dir, WEBKIT_SOUP_CACHE_FILE, NULL);
1593         if (!g_file_get_contents (filename, &contents, &length, NULL)) {
1594                 g_free (filename);
1595                 return;
1596         }
1597         g_free (filename);
1598
1599         variant_format = g_variant_type_new (WEBKIT_SOUP_CACHE_ENTRIES_FORMAT);
1600         cache_variant = g_variant_new_from_data (variant_format, (const gchar *)contents, length, FALSE, NULL, NULL);
1601         g_variant_type_free (variant_format);
1602
1603         g_variant_get (cache_variant, WEBKIT_SOUP_CACHE_ENTRIES_FORMAT, &entries_iter);
1604         entry = g_slice_new0 (WebKitSoupCacheEntry);
1605
1606         while (g_variant_iter_loop (entries_iter, WEBKIT_SOUP_CACHE_PHEADERS_FORMAT,
1607                                     &entry->key, &entry->filename, &entry->must_revalidate,
1608                                     &entry->freshness_lifetime, &entry->corrected_initial_age,
1609                                     &entry->response_time, &entry->hits, &entry->length,
1610                                     &headers_iter)) {
1611                 const gchar *header_key, *header_value;
1612
1613                 /* SoupMessage Headers */
1614                 entry->headers = soup_message_headers_new (SOUP_MESSAGE_HEADERS_RESPONSE);
1615                 while (g_variant_iter_loop (headers_iter, WEBKIT_SOUP_CACHE_DECODE_HEADERS_FORMAT, &header_key, &header_value))
1616                         soup_message_headers_append (entry->headers, header_key, header_value);
1617
1618                 /* Insert in cache */
1619                 if (!webkit_soup_cache_entry_insert_by_key (cache, (const gchar *)entry->key, entry, FALSE))
1620                         webkit_soup_cache_entry_free (entry, TRUE);
1621
1622                 /* New entry for the next iteration. This creates an
1623                    extra object the last iteration but it's worth it
1624                    as we save several if's */
1625                 entry = g_slice_new0 (WebKitSoupCacheEntry);
1626         }
1627         /* Remove last created entry */
1628         g_slice_free (WebKitSoupCacheEntry, entry);
1629
1630         /* Sort LRU (shouldn't be needed). First reverse as elements
1631          * are always prepended when inserting
1632          */
1633         cache->priv->lru_start = g_list_reverse (cache->priv->lru_start);
1634         cache->priv->lru_start = g_list_sort (cache->priv->lru_start, lru_compare_func);
1635
1636         /* frees */
1637         g_variant_iter_free (entries_iter);
1638         g_variant_unref (cache_variant);
1639 }
1640
1641 void
1642 webkit_soup_cache_set_max_size (WebKitSoupCache *cache,
1643                                 guint max_size)
1644 {
1645         cache->priv->max_size = max_size;
1646         cache->priv->max_entry_data_size = cache->priv->max_size / MAX_ENTRY_DATA_PERCENTAGE;
1647 }
1648
1649 guint
1650 webkit_soup_cache_get_max_size (WebKitSoupCache *cache)
1651 {
1652         return cache->priv->max_size;
1653 }