Fix -Wreturn-type and -Wunused-parameter warnings, January 2022 edition
[WebKit-https.git] / tests / test_sanitize.py
1 # adapted from http://www.iamcal.com/publish/articles/php/processing_html_part_2/
2 # and from http://feedparser.org/tests/wellformed/sanitize/
3 # by Aaron Swartz, 2006, public domain
4
5 import unittest, new
6 from planet import sanitize
7
8 class SanitizeTest(unittest.TestCase): pass
9
10 # each call to HTML adds a test case to SanitizeTest
11 testcases = 0
12 def HTML(a, b):
13   global testcases
14   testcases += 1
15   func = lambda self: self.assertEqual(sanitize.HTML(a), b)
16   method = new.instancemethod(func, None, SanitizeTest)
17   setattr(SanitizeTest, "test_%d" % testcases, method)
18
19 ## basics
20 HTML("","")
21 HTML("hello","hello")
22
23 ## balancing tags
24 HTML("<b>hello","<b>hello</b>")
25 HTML("hello<b>","hello<b></b>")
26 HTML("hello</b>","hello")
27 HTML("hello<b/>","hello<b></b>")
28 HTML("<b><b><b>hello","<b><b><b>hello</b></b></b>")
29 HTML("</b><b>","<b></b>")
30
31 ## trailing slashes
32 HTML('<img>','<img />')
33 HTML('<img/>','<img />')
34 HTML('<b/></b>','<b></b>')
35
36 ## balancing angle brakets
37 HTML('<img src="foo"','')
38 HTML('b>','b>')
39 HTML('<img src="foo"/','')
40 HTML('>','>')
41 HTML('foo<b','foo')
42 HTML('b>foo','b>foo')
43 HTML('><b','>')
44 HTML('b><','b>')
45 HTML('><b>','><b></b>')
46
47 ## attributes
48 HTML('<img src=foo>','<img src="foo" />')
49 HTML('<img asrc=foo>','<img />')
50 HTML('<img src=test test>','<img src="test" />')
51
52 ## dangerous tags (a small sample)
53 sHTML = lambda x: HTML(x, 'safe <b>description</b>')
54 sHTML('safe<applet code="foo.class" codebase="http://example.com/"></applet> <b>description</b>')
55 sHTML('<notinventedyet>safe</notinventedyet> <b>description</b>')
56 sHTML('<blink>safe</blink> <b>description</b>')
57 sHTML('safe<embed src="http://example.com/"> <b>description</b>')
58 sHTML('safe<frameset rows="*"><frame src="http://example.com/"></frameset> <b>description</b>')
59 sHTML('safe<iframe src="http://example.com/"> <b>description</b></iframe>')
60 sHTML('safe<link rel="stylesheet" type="text/css" href="http://example.com/evil.css"> <b>description</b>')
61 sHTML('safe<meta http-equiv="Refresh" content="0; URL=http://example.com/"> <b>description</b>')
62 sHTML('safe<object classid="clsid:C932BA85-4374-101B-A56C-00AA003668DC"> <b>description</b>')
63 sHTML('safe<script type="text/javascript">location.href=\'http:/\'+\'/example.com/\';</script> <b>description</b>')
64
65 for x in ['onabort', 'onblur', 'onchange', 'onclick', 'ondblclick', 'onerror', 'onfocus', 'onkeydown', 'onkeypress', 'onkeyup', 'onload', 'onmousedown', 'onmouseout', 'onmouseover', 'onmouseup', 'onreset', 'resize', 'onsubmit', 'onunload']:
66     HTML('<img src="http://www.ragingplatypus.com/i/cam-full.jpg" %s="location.href=\'http://www.ragingplatypus.com/\';" />' % x,
67     '<img src="http://www.ragingplatypus.com/i/cam-full.jpg" />')
68
69 HTML('<a href="http://www.ragingplatypus.com/" style="display:block; position:absolute; left:0; top:0; width:100%; height:100%; z-index:1; background-color:black; background-image:url(http://www.ragingplatypus.com/i/cam-full.jpg); background-x:center; background-y:center; background-repeat:repeat;">never trust your upstream platypus</a>', '<a href="http://www.ragingplatypus.com/">never trust your upstream platypus</a>')
70
71 ## ignorables
72 HTML('foo<style>bar', 'foo')
73 HTML('foo<style>bar</style>', 'foo')
74
75 ## non-allowed tags
76 HTML('<script>','')
77 HTML('<script','')
78 HTML('<script/>','')
79 HTML('</script>','')
80 HTML('<script woo=yay>','')
81 HTML('<script woo="yay">','')
82 HTML('<script woo="yay>','')
83 HTML('<script woo="yay<b>','')
84 HTML('<script<script>>','')
85 HTML('<<script>script<script>>','')
86 HTML('<<script><script>>','')
87 HTML('<<script>script>>','')
88 HTML('<<script<script>>','')
89
90 ## bad protocols
91 HTML('<a href="http://foo">bar</a>', '<a href="http://foo">bar</a>')
92 HTML('<a href="ftp://foo">bar</a>', '<a href="ftp://foo">bar</a>')
93 HTML('<a href="mailto:foo">bar</a>', '<a href="mailto:foo">bar</a>')
94
95 # not yet supported:
96 #HTML('<a href="javascript:foo">bar</a>', '<a href="#foo">bar</a>')
97 #HTML('<a href="java script:foo">bar</a>', '<a href="#foo">bar</a>')
98 #HTML('<a href="java\tscript:foo">bar</a>', '<a href="#foo">bar</a>')
99 #HTML('<a href="java\nscript:foo">bar</a>', '<a href="#foo">bar</a>')
100 #HTML('<a href="java'+chr(1)+'script:foo">bar</a>', '<a href="#foo">bar</a>')
101 #HTML('<a href="jscript:foo">bar</a>', '<a href="#foo">bar</a>')
102 #HTML('<a href="vbscript:foo">bar</a>', '<a href="#foo">bar</a>')
103 #HTML('<a href="view-source:foo">bar</a>', '<a href="#foo">bar</a>')
104
105 ## auto closers
106 HTML('<img src="a">', '<img src="a" />')
107 HTML('<img src="a">foo</img>', '<img src="a" />foo')
108 HTML('</img>', '')
109
110 ## crazy: http://alpha-geek.com/example/crazy_html2.html
111 HTML('<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">\r\n\r\n<html xmlns="http://www.w3.org/1999/xhtml">\r\n<head>\r\n<title>Crazy HTML -- Can Your Regex Parse This?</title>\r\n</head>\r\n<body    notRealAttribute="value"onload="executeMe();"foo="bar"\r\n\r\n>\r\n<!-- <script> -->\r\n\r\n<!-- \r\n\t<script> \r\n-->\r\n\r\n</script>\r\n\r\n\r\n<script\r\n\r\n\r\n>\r\n\r\nfunction executeMe()\r\n{\r\n\r\n\r\n\r\n\r\n/* <script> \r\nfunction am_i_javascript()\r\n{\r\n\tvar str = "Some innocuously commented out stuff";\r\n}\r\n< /script>\r\n*/\r\n\r\n\t\r\n\t\r\n\t\r\n\t\r\n\t\r\n\t\r\n\t\r\n\t\r\n\talert("Executed");\r\n}\r\n\r\n                                   </script\r\n\r\n\r\n\r\n>\r\n<h1>Did The Javascript Execute?</h1>\r\n<div notRealAttribute="value\r\n"onmouseover="\r\nexecuteMe();\r\n"foo="bar">\r\nI will execute here, too, if you mouse over me\r\n</div>\r\nThis is to keep you guys honest...<br />\r\nI like ontonology.  I like to script ontology.  Though, script>style>this.\r\n</body>\r\n</html>', 'Crazy HTML -- Can Your Regex Parse This?\n\n\n<!-- <script> -->\n\n<!-- \n\t<script> \n-->\n\n\n\nfunction executeMe()\n{\n\n\n\n\n/* \n<h1>Did The Javascript Execute?</h1>\n<div>\nI will execute here, too, if you mouse over me\n</div>\nThis is to keep you guys honest...<br />\nI like ontonology.  I like to script ontology.  Though, script>style>this.')
112
113 # valid entity references
114 HTML("&nbsp;","&nbsp;");
115 HTML("&#160;","&#160;");
116 HTML("&#xa0;","&#xa0;");
117 HTML("&#xA0;","&#xA0;");
118
119 # unescaped ampersands
120 HTML("AT&T","AT&amp;T");
121 HTML("http://example.org?a=1&b=2","http://example.org?a=1&amp;b=2");
122
123 # quote characters
124 HTML('<a title="&#34;">quote</a>','<a title="&#34;">quote</a>')
125 HTML('<a title="&#39;">quote</a>','<a title="&#39;">quote</a>')