spamassassin 中文垃圾邮件规则 (2007年01月26日 修改):
以下内容是我的 local.cf 配置文件。
注明:以下涉及到的 IP 地址(12.34.56.78),域名(test.com.cn),改为你的实际地址和域名,空格处使用 tab 键。
----------------------------------------------------------------------------------------------
# SpamAssassin config file for version 3.x
# How many hits before a message is considered spam.
required_score 11.0
# Change the subject of suspected spam
rewrite_header subject ***SPAM(_SCORE_)***
# Encapsulate spam in an attachment (0=no, 1=yes, 2=safe)
report_safe 0
# Enable the Bayes system
use_auto_whitelist 0
use_bayes 1
use_bayes_rules 1
bayes_path /var/spool/MailScanner/.spamassassin/bayes
# Enable Bayes auto-learning
#bayes_auto_learn 1
auto_learn 1
bayes_min_ham_num 30
bayes_min_spam_num 40
bayes_auto_learn_threshold_nonspam 0.0
bayes_auto_learn_threshold_spam 11.0
# Enable or disable network checks
skip_rbl_checks 0
use_razor2 0
use_dcc 0
use_pyzor 0
dns_available yes
# Mail using languages used in these country codes will not be marked
# as being possibly spam in a foreign language.
# - chinese english japanese
ok_languages all
# Mail using locales used in these country codes will not be marked
# as being possibly spam in a foreign language.
ok_locales all
header LOCAL_RCVD Received =~ /.*\(\S+\.test\.com\.cn\s+\[.*\]\)/
describe LOCAL_RCVD Received from local machine
score LOCAL_RCVD -100
# Disabled scores.
score HEADER_8BITS 0
score HTML_COMMENT_8BITS 0
score SUBJ_FULL_OF_8BITS 0
score UPPERCASE_25_50 0
score UPPERCASE_50_75 0
score UPPERCASE_75_100 0
# local domain from but ip not match.
header __FROM_TEATIME Received =~ /from test.com.cn/i
header __FROM_TEATIME_IP Received =~ /\[12\.34\.56\.78\]/
meta FROM_TEATIME_BUT_IP_ERROR (__FROM_TEATIME)
describe FROM_TEATIME_BUT_IP_ERROR From test.com.cn but ip not match
score FROM_TEATIME_BUT_IP_ERROR 11.0
# From addr like <some chinese>@mydomain
header __FROM_8BIT_LOCAL From:addr =~ /[a-zA-Z0-9_-]*[\x80xff][a-zA-Z0-9_-]*\@test\.com\.cn/i
header __TO_8BIT_LOCAL To:addr =~ /[a-zA-Z0-9_-]*[\x80-\xff][a-zA-Z0-9_-]*\@test\.com\.cn/i
header __CC_8BIT_LOCAL Cc:addr =~ /[a-zA-Z0-9_-]*[\x80-\xff][a-zA-Z0-9_-]*\@test\.com\.cn/i
meta LOCAL_8BIT_USER (__FROM_8BIT_LOCAL || __TO_8BIT_LOCAL || __CC_8BIT_LOCAL)
describe LOCAL_8BIT_USER From or To a
chinese@test.com.cn
score LOCAL_8BIT_USER 11.0
score ADDRESS_IN_SUBJECT 11.00 # To: address appears in Subject
score ADDR_FREE 3.000 # From Address contains FREE
score BAD_ENC_HEADER 0.500 # Message has bad MIME encoding in the header
score BANG_MORE 5.000 # Talks about more with an exclamation!
score BILLION_DOLLARS 0.500 # Talks about lots of money
score BAYES_00 0.000 # Bayesian spam probability is 0 to 1%
score BAYES_05 0.000 # Bayesian spam probability is 1 to 5%
score BAYES_20 0.000 # Bayesian spam probability is 5 to 20%
score BAYES_40 0.500 # Bayesian spam probability is 20 to 40%
score BAYES_50 1.000 # Bayesian spam probability is 40 to 60%
score BAYES_60 1.000 # Bayesian spam probability is 60 to 80%
score BAYES_80 1.000 # Bayesian spam probability is 80 to 95%
score BAYES_95 1.000 # Bayesian spam probability is 95 to 99%
score BAYES_99 1.000 # Bayesian spam probability is 99 to 100%
score DATE_IN_FUTURE_03_06 1.000 # Date: is 3 to 6 hours after Received: date
score DATE_IN_FUTURE_06_12 1.000 # Date: is 6 to 12 hours after Received: date
score DATE_IN_FUTURE_12_24 1.000 # Date: is 12 to 24 hours after Received: date
score DATE_IN_FUTURE_24_48 1.000 # Date: is 24 to 48 hours after Received: date
score DATE_IN_FUTURE_48_96 1.000 # Date: is 48 to 96 hours after Received: date
score DATE_IN_FUTURE_96_XX 1.000 # Date: is 96 hours or more after Received: date
score DATE_IN_PAST_03_06 1.000 # Date: is 3 to 6 hours before Received: date
score DATE_IN_PAST_06_12 1.000 # Date: is 6 to 12 hours before Received: date
score DATE_IN_PAST_12_24 1.000 # Date: is 12 to 24 hours before Received: date
score DATE_IN_PAST_24_48 1.000 # Date: is 24 to 48 hours before Received: date
score DATE_IN_PAST_48_96 1.000 # Date: is 48 to 96 hours before Received: date
score DATE_IN_PAST_96_XX 1.000 # Date: is 96 hours or more before Received: date
score DATE_SPAMWARE_Y2K 1.000 # Date header uses unusual Y2K formatting
score DNS_FROM_AHBL_RHSBL 0.000 # From: sender listed in dnsbl.ahbl.org
score DNS_FROM_RFC_ABUSE 0.000 # Envelope sender in abuse.rfc-ignorant.org
score DNS_FROM_RFC_BOGUSMX 0.000 # Envelope sender in bogusmx.rfc-ignorant.org
score DNS_FROM_RFC_DSN 0.000 # Envelope sender in dsn.rfc-ignorant.org
score DNS_FROM_RFC_POST 0.000 # Envelope sender in postmaster.rfc-ignorant.org
score DNS_FROM_RFC_WHOIS 0.000 # Envelope sender in whois.rfc-ignorant.org
score DNS_FROM_SECURITYSAGE 1.000 # Envelope sender in blackholes.securitysage.com
score DOMAIN_4U2 2.000 # Domain name containing a "4u" variant
score DOMAIN_RATIO 3.000 # Message body mentions many internet domains
score EMPTY_MESSAGE 1.000 # Message appears to be empty with no Subject: text
score ENGLISH_UCE_SUBJECT 1.400 # Subject contains an English UCE tag
score FORGED_HOTMAIL_RCVD 1.000 # Forged hotmail.com 'Received:' header found
score FORGED_HOTMAIL_RCVD2 1.000 # hotmail.com 'From' address, but no 'Received:'
score FORGED_MSGID_AOL 1.500 # Message-ID is forged, (aol.com)
score FORGED_MSGID_EXCITE 1.500 # Message-ID is forged, (excite.com)
score FORGED_MSGID_HOTMAIL 1.500 # Message-ID is forged, (hotmail.com)
score FORGED_MSGID_MSN 1.500 # Message-ID is forged, (msn.com)
score FORGED_MSGID_YAHOO 1.500 # Message-ID is forged, (yahoo.com)
score FORGED_MUA_AOL_FROM 1.500 # Forged mail pretending to be from AOL (by From)
score FORGED_MUA_EUDORA 1.500 # Forged mail pretending to be from Eudora
score FORGED_MUA_IMS 1.500 # Forged mail pretending to be from IMS
score FORGED_MUA_MOZILLA 1.500 # Forged mail pretending to be from Mozilla
score FORGED_MUA_OIMO 1.500 # Forged mail pretending to be from MS Outlook IMO
score FORGED_MUA_OUTLOOK 1.500 # Forged mail pretending to be from MS Outlook
score FORGED_MUA_THEBAT_BOUN 2.000 # Mail pretending to be from The Bat! (boundary)
score FORGED_MUA_THEBAT_CS 1.500 # Mail pretending to be from The Bat! (charset)
score FORGED_OUTLOOK_HTML 1.500 # Outlook can't send HTML message only
score FORGED_OUTLOOK_TAGS 1.000 # Outlook can't send HTML in this format
score FROM_ALL_NUMS 1.500 # From numeric address (except US/Canada phones)
score FROM_BLANK_NAME 1.500 # From: contains empty name
score FROM_DOMAIN_NOVOWEL 1.500 # From: domain has series of non-vowel letters
score FROM_ENDS_IN_NUMS 1.000 # From: ends in many numbers
score FROM_ILLEGAL_CHARS 1.000 # From: has too many raw illegal characters
score GAPPY_SUBJECT 3.000 # Subject: contains G.a.p.p.y-T.e.x.t
score HEAD_ILLEGAL_CHARS 0.500 # Headers have too many raw illegal characters
score HELO_DYNAMIC_IPADDR2 2.000 # Relay HELO'd using suspicious hostname (IP addr 2)
score HTML_00_10 0.000 # Message is 0% to 10% HTML
score HTML_10_20 0.000 # Message is 10% to 20% HTML
score HTML_20_30 0.000 # Message is 20% to 30% HTML
score HTML_30_40 0.000 # Message is 30% to 40% HTML
score HTML_40_50 0.000 # Message is 40% to 50% HTML
score HTML_50_60 0.000 # Message is 50% to 60% HTML
score HTML_60_70 0.000 # Message is 60% to 70% HTML
score HTML_70_80 0.000 # Message is 70% to 80% HTML
score HTML_80_90 0.000 # Message is 80% to 90% HTML
score HTML_90_100 0.000 # Message is 90% to 100% HTML
score HTML_FONT_BIG 0.000 # HTML tag for a big font size
score HTML_FONT_FACE_BAD 0.000 # HTML font face is not a word
score HTML_IMAGE_ONLY_04 0.500 # HTML: images with 0-400 bytes of words
score HTML_IMAGE_ONLY_08 0.400 # HTML: images with 400-800 bytes of words
score HTML_IMAGE_ONLY_12 1.000 # HTML: images with 800-1200 bytes of words
score HTML_IMAGE_ONLY_16 1.000 # HTML: images with 1200-1600 bytes of words
score HTML_IMAGE_ONLY_20 1.000 # HTML: images with 1600-2000 bytes of words
score HTML_IMAGE_ONLY_24 1.000 # HTML: images with 2000-2400 bytes of words
score HTML_IMAGE_ONLY_28 1.000 # HTML: images with 2400-2800 bytes of words
score HTML_IMAGE_ONLY_32 1.000 # HTML: images with 2800-3200 bytes of words
score HTML_IMAGE_RATIO_02 2.000 # HTML has a low ratio of text to image area
score HTML_IMAGE_RATIO_04 2.000 # HTML has a low ratio of text to image area
score HTML_IMAGE_RATIO_06 2.000 # HTML has a low ratio of text to image area
score HTML_IMAGE_RATIO_08 2.000 # HTML has a low ratio of text to image area
score HTML_MESSAGE 0.000 # HTML included in message
score HTML_MIME_NO_HTML_TAG 3.000 # HTML-only message, but there is no HTML tag
score HTML_MISSING_CTYPE 3.000 # Message is HTML without HTML Content-Type
score HTML_NONELEMENT_00_10 0.000 # 0% to 10% of HTML elements are non-standard
score HTML_NONELEMENT_10_20 0.500 # 10% to 20% of HTML elements are non-standard
score HTML_NONELEMENT_20_30 0.000 # 20% to 30% of HTML elements are non-standard
score HTML_NONELEMENT_30_40 0.500 # 30% to 40% of HTML elements are non-standard
score HTML_NONELEMENT_40_50 0.000 # 40% to 50% of HTML elements are non-standard
score HTML_NONELEMENT_50_60 0.500 # 50% to 60% of HTML elements are non-standard
score HTML_NONELEMENT_60_70 0.000 # 60% to 70% of HTML elements are non-standard
score HTML_NONELEMENT_70_80 0.500 # 70% to 80% of HTML elements are non-standard
score HTML_NONELEMENT_80_90 0.000 # 80% to 90% of HTML elements are non-standard
score HTML_NONELEMENT_90_100 0.500 # 90% to 100% of HTML elements are non-standard
score HTML_OBFUSCATE_05_10 0.600 # Message is 5% to 10% HTML obfuscation
score HTML_OBFUSCATE_10_20 0.500 # Message is 10% to 20% HTML obfuscation
score HTML_OBFUSCATE_20_30 1.000 # Message is 20% to 30% HTML obfuscation
score HTML_OBFUSCATE_30_40 1.000 # Message is 30% to 40% HTML obfuscation
score HTML_OBFUSCATE_40_50 1.000 # Message is 40% to 50% HTML obfuscation
score HTML_OBFUSCATE_50_60 1.500 # Message is 50% to 60% HTML obfuscation
score HTML_OBFUSCATE_60_70 1.500 # Message is 60% to 70% HTML obfuscation
score HTML_OBFUSCATE_70_80 1.000 # Message is 70% to 80% HTML obfuscation
score HTML_OBFUSCATE_80_90 1.000 # Message is 80% to 90% HTML obfuscation
score HTML_OBFUSCATE_90_100 1.000 # Message is 90% to 100% HTML obfuscation
score HTML_SHORT_LINK_IMG_1 2.000 # HTML is very short with a linked image
score HTML_SHORT_LINK_IMG_2 2.000 # HTML is very short with a linked image
score HTML_SHORT_LINK_IMG_3 0.500 # HTML is very short with a linked image
score HTML_TAG_EXIST_BGSOUND 0.500 # HTML has "bgsound" tag
score HTML_TAG_EXIST_MARQUEE 0.500 # HTML has "marquee" tag
score HTML_TAG_EXIST_TBODY 0.500 # HTML has "tbody" tag
score HTML_TEXT_AFTER_BODY 0.500 # HTML contains text after BODY close tag
score HTML_TEXT_AFTER_HTML 0.500 # HTML contains text after HTML close tag
score INVALID_DATE 0.500 # Invalid Date: header (not RFC 2822)
score INVALID_MSGID 0.500 # Message-Id is not valid, according to RFC 2822
score INVALID_TZ_CST 0.500 # Invalid date in header (wrong CST timezone)
score INVALID_TZ_EST 0.500 # Invalid date in header (wrong EST timezone)
score INVALID_TZ_GMT 0.500 # Invalid date in header (wrong GMT/UTC timezone)
score MAILTO_TO_SPAM_ADDR 0.200 # Includes a link to a likely spammer email
score MIME_BASE64_NO_NAME 0.500 # base64 attachment does not have a file name
score MIME_BASE64_TEXT 0.500 # Message text disguised using base64 encoding
score MIME_HTML_ONLY 0.500 # Message only has text/html MIME parts
score MIME_HTML_ONLY_MULTI 0.000 # Multipart message only has text/html MIME parts
score MIME_HEADER_CTYPE_ONLY 0.000 # 'Content-Type' found without required MIME headers
score MISSING_MIMEOLE 0.500 # Message has X-MSMail-Priority, but no X-MimeOLE
score MISSING_SUBJECT 1.000 # Missing Subject: header
score MSGID_DOLLARS 3.000 # Message-Id has pattern used in spam
score MSGID_FROM_MTA_ID 0.500 # Message-Id for external message added locally
score MSGID_OUTLOOK_INVALID 0.500 # Message-Id is fake (in Outlook Express format)
score NO_DNS_FOR_FROM 0.500 # Envelope sender has no MX or A DNS records
score NO_REAL_NAME 1.000 # From: does not include a real name
score PLING_PLING 1.000 # Subject has lots of exclamation marks
score RATWARE_MS_HASH 3.000 # Bulk email fingerprint (msgid ms hash) found
score RATWARE_RCVD_AT 3.000 # Bulk email fingerprint (Received @) found
score RATWARE_RCVD_LC_ESMTP 1.500 # Bulk email fingerprint ('esmtp' Received) found
score RATWARE_RCVD_PF 3.000 # Bulk email fingerprint (Received PF) found
score RATWARE_ZERO_TZ 3.000 # Bulk email fingerprint (+0000) found
score RCVD_DOUBLE_IP_SPAM 0.500 # Bulk email fingerprint (double IP) found
score RCVD_HELO_IP_MISMATCH 1.000 # Received: HELO and IP do not match, but should
score RCVD_ILLEGAL_IP 1.000 # Received: contains illegal IP address
score RCVD_NUMERIC_HELO 1.000 # Received: contains an IP address used for HELO
score SPF_FAIL 11.00 # SPF: sender does not match SPF record (fail)
score SPF_HELO_FAIL 11.00 # SPF: HELO does not match SPF record (fail)
score SPF_HELO_NEUTRAL 1.000 # SPF: HELO does not match SPF record (neutral)
score SPF_HELO_PASS 0.000 # SPF: HELO matches SPF record
score SPF_HELO_SOFTFAIL 0.000 # SPF: HELO does not match SPF record (softfail)
score SPF_NEUTRAL 1.000 # SPF: sender does not match SPF record (neutral)
score SPF_PASS 0.000 # SPF: sender matches SPF record
score SPF_SOFTFAIL 0.500 # SPF: sender does not match SPF record (softfail)
score SUBJECT_DIET 1.812 # Subject talks about losing pounds
score SUBJECT_DRUG_GAP_C 3.000 # Subject contains a gappy version of 'cialis'
score SUBJECT_DRUG_GAP_L 3.000 # Subject contains a gappy version of 'levitra'
score SUBJECT_DRUG_GAP_P 3.000 # Subject contains a gappy version of 'phentermine'
score SUBJECT_DRUG_GAP_S 3.000 # Subject contains a gappy version of 'soma'
score SUBJECT_DRUG_GAP_VA 3.000 # Subject contains a gappy version of 'valium'
score SUBJECT_DRUG_GAP_VIC 3.000 # Subject contains a gappy version of 'vicodin'
score SUBJECT_DRUG_GAP_X 3.000 # Subject contains a gappy version of 'xanax'
score SUBJECT_ENCODED_TWICE 2.000 # Subject: MIME encoded twice
score SUBJECT_EXCESS_BASE64 0.782 # Subject: base64 encoded encoded unnecessarily
score SUBJECT_EXCESS_QP 0.000 # Subject: quoted-printable encoded unnecessarily
score SUBJECT_FUZZY_CHEAP 3.000 # Attempt to obfuscate words in Subject:
score SUBJECT_FUZZY_MEDS 3.000 # Attempt to obfuscate words in Subject:
score SUBJECT_FUZZY_PENIS 3.000 # Attempt to obfuscate words in Subject:
score SUBJECT_FUZZY_TION 3.000 # Attempt to obfuscate words in Subject:
score SUBJECT_NOVOWEL 0.000 # Subject: has long non-vowel letter sequence
score SUBJECT_SEXUAL 2.160 # Subject indicates sexually-explicit content
score SUBJ_2_NUM_PARENS 0.952 # Subject contains common spam sign (2 numbers)
score SUBJ_ALL_CAPS 2.000 # Subject is all capitals
score SUBJ_AS_SEEN 3.000 # Subject contains "As Seen"
score SUBJ_BUY 2.000 # Subject line starts with Buy or Buying
score SUBJ_CONSONANTS 0.000 # Subject contains consecutive consonants in "word"
score SUBJ_DOLLARS 0.650 # Subject starts with dollar amount
score SUBJ_FOR_ONLY 1.500 # Subject contains "For Only"
score SUBJ_FREE_CAP 1.200 # Subject contains "FREE" in CAPS
score SUBJ_GUARANTEED 1.360 # Subject GUARANTEED
score SUBJ_HAS_SPACES 1.000 # Subject contains lots of white space
score SUBJ_HAS_UNIQ_ID 0.895 # Subject contains a unique ID
score SUBJ_ILLEGAL_CHARS 1.000 # # Subject: has too many raw illegal characters
score SUBJ_LIFE_INSURANCE 11.00 # Subject includes "life insurance"
score SUBJ_YOUR_DEBT 11.00 # Subject contains "Your Bills" or similar
score SUBJ_YOUR_FAMILY 11.00 # Subject contains "Your Family"
score SUBJ_YOUR_OWN 4.000 # Subject contains "Your Own"
score SUB_FREE_OFFER 1.000 # Subject starts with "Free"
score SUB_HELLO 1.500 # Subject starts with "Hello"
score TO_EMPTY 11.00 # To: is empty
score UNPARSEABLE_RELAY 0.000 # Informational: message has unparseable relay lines
score UPPERCASE_25_50 0.500 # message body is 25-50% uppercase
score UPPERCASE_50_75 1.000 # message body is 50-75% uppercase
score UPPERCASE_75_100 1.500 # message body is 75-100% uppercase
score URIBL_AB_SURBL 0.000 # Contains an URL listed in the AB SURBL blocklist
score URIBL_JP_SURBL 0.000 # Contains an URL listed in the JP SURBL blocklist
score URIBL_OB_SURBL 0.000 # Contains an URL listed in the OB SURBL blocklist
score URIBL_PH_SURBL 0.000 # Contains an URL listed in the PH SURBL blocklist
score URIBL_SBL 0.000 # Contains an URL listed in the SBL blocklist
score URIBL_SC_SURBL 0.000 # Contains an URL listed in the SC SURBL blocklist
score URIBL_WS_SURBL 0.000 # Contains an URL listed in the WS SURBL blocklist
score URI_SCHEME_MIXED_CASE 1.500 # URI scheme has mixed uppercase and lowercase
score WEIRD_QUOTING 1.000 # Weird repeated double-quotation marks
score USER_IN_BLACKLIST 20.00 # From: address is in the user's black-list
score USER_IN_WHITELIST -80.0 # From: address is in the user's white-list