# SpamAssassin rules file: HTML tests
#
# Please don't modify this file as your changes will be overwritten with
# the next update. Use /etc/mail/spamassassin/local.cf instead.
# See 'perldoc Mail::SpamAssassin::Conf' for details.
#
# <@LICENSE>
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to you under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at:
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# @LICENSE>
#
###########################################################################
require_version 3.004006
# HTML parser tests
#
# please sort these by eval type then name
meta HTML_SHORT_LINK_IMG_1 __HTML_LENGTH_0000_1024 && __HTML_LINK_IMAGE
meta HTML_SHORT_LINK_IMG_2 __HTML_LENGTH_1024_1536 && __HTML_LINK_IMAGE
meta HTML_SHORT_LINK_IMG_3 __HTML_LENGTH_1536_2048 && __HTML_LINK_IMAGE
describe HTML_SHORT_LINK_IMG_1 HTML is very short with a linked image
describe HTML_SHORT_LINK_IMG_2 HTML is very short with a linked image
describe HTML_SHORT_LINK_IMG_3 HTML is very short with a linked image
meta HTML_SHORT_CENTER (__HTML_LENGTH_384 && __TAG_EXISTS_CENTER)
describe HTML_SHORT_CENTER HTML is very short with CENTER tag
meta HTML_TITLE_SUBJ_DIFF __HTML_TITLE_SUBJ_DIFF && !__MIME_ATTACHMENT
meta HTML_CHARSET_FARAWAY (__HTML_CHARSET_FARAWAY && __HIGHBITS)
describe HTML_CHARSET_FARAWAY A foreign language charset used in HTML markup
tflags HTML_CHARSET_FARAWAY userconf
meta HTML_MIME_NO_HTML_TAG MIME_HTML_ONLY && !__TAG_EXISTS_HTML
describe HTML_MIME_NO_HTML_TAG HTML-only message, but there is no HTML tag
meta HTML_MISSING_CTYPE (!__MIME_HTML && HTML_MESSAGE)
describe HTML_MISSING_CTYPE Message is HTML without HTML Content-Type
###########################################################################
# rawbody HTML tests
rawbody HIDE_WIN_STATUS /<[^>]{1,1000}onMouseOver=[^>]{1,1000}window\.status=/i
describe HIDE_WIN_STATUS Javascript to hide URLs in browser
rawbody __OBFUSCATING_COMMENT_A /\w(?:]*>)+\w/
rawbody __OBFUSCATING_COMMENT_B /[^\s>](?:]*>)+[^\s<]/
ifplugin Mail::SpamAssassin::Plugin::HTMLEval
ifplugin Mail::SpamAssassin::Plugin::MIMEEval
meta OBFUSCATING_COMMENT ((__OBFUSCATING_COMMENT_A && HTML_MESSAGE) || (__OBFUSCATING_COMMENT_B && MIME_HTML_ONLY)) && !__ISO_2022_JP_DELIM
describe OBFUSCATING_COMMENT HTML comments which obfuscate text
endif
endif
# spams that are assembled from a Javascript array
# look for the XOR op
rawbody __JS_FROMCHARCODE /String\.fromCharCode\s*\(\s*\S+\s*\[\s*\S+\s*\]\s*\^/
rawbody __JS_DOCWRITE /document\.write/
meta JS_FROMCHARCODE (__JS_FROMCHARCODE && __JS_DOCWRITE)
describe JS_FROMCHARCODE Document is built from a Javascript charcode array
# a good possible rule that may resurface
# ! $ % ' ( ) , - . / : ; = ? @ _
#rawbody ENTITY_DEC_OTHER /\&\#0*(?:3[3679]|4[014567]|5[89]|6[134]|95)\;/
#describe ENTITY_DEC_OTHER HTML contains needlessly encoded punctuation
body __HIGHBITS /(?:[\x80-\xff].?){4}/
# note: __HIGHBITS is used by HTML_CHARSET_FARAWAY
###########################################################################
ifplugin Mail::SpamAssassin::Plugin::HTMLEval
# HTML control test, HTML spam rules should all have better S/O than this
body HTML_MESSAGE eval:html_test('html')
describe HTML_MESSAGE HTML included in message
# HTML comment tests
body HTML_COMMENT_SHORT eval:html_text_match('comment', '')
describe HTML_COMMENT_SHORT HTML comment is very short
body HTML_COMMENT_SAVED_URL eval:html_text_match('comment', '