发新话题
打印

spamassassin 中文垃圾邮件规则 (2007年01月26日 修改)

spamassassin 中文垃圾邮件规则 (2007年01月26日 修改)

spamassassin 中文垃圾邮件规则 (2007年01月26日 修改):

以下内容是我的 local.cf 配置文件。

注明:以下涉及到的 IP 地址(12.34.56.78),域名(test.com.cn),改为你的实际地址和域名,空格处使用 tab 键。
----------------------------------------------------------------------------------------------

# SpamAssassin config file for version 3.x

# How many hits before a message is considered spam.
required_score              11.0

# Change the subject of suspected spam
rewrite_header subject         ***SPAM(_SCORE_)***

# Encapsulate spam in an attachment (0=no, 1=yes, 2=safe)
report_safe             0

# Enable the Bayes system
use_auto_whitelist       0
use_bayes               1
use_bayes_rules              1
bayes_path              /var/spool/MailScanner/.spamassassin/bayes

# Enable Bayes auto-learning
#bayes_auto_learn       1
auto_learn              1
bayes_min_ham_num       30
bayes_min_spam_num       40
bayes_auto_learn_threshold_nonspam       0.0
bayes_auto_learn_threshold_spam              11.0

# Enable or disable network checks
skip_rbl_checks         0
use_razor2             0
use_dcc                0
use_pyzor               0

dns_available              yes

# Mail using languages used in these country codes will not be marked
# as being possibly spam in a foreign language.
# - chinese english japanese
ok_languages            all

# Mail using locales used in these country codes will not be marked
# as being possibly spam in a foreign language.
ok_locales              all

header LOCAL_RCVD Received =~ /.*\(\S+\.test\.com\.cn\s+\[.*\]\)/
describe LOCAL_RCVD Received from local machine
score LOCAL_RCVD -100

# Disabled scores.

score       HEADER_8BITS              0
score       HTML_COMMENT_8BITS       0
score       SUBJ_FULL_OF_8BITS       0
score       UPPERCASE_25_50              0
score       UPPERCASE_50_75              0
score       UPPERCASE_75_100       0

# local domain from but ip not match.

header __FROM_TEATIME Received =~ /from test.com.cn/i
header __FROM_TEATIME_IP Received =~ /\[12\.34\.56\.78\]/
meta FROM_TEATIME_BUT_IP_ERROR (__FROM_TEATIME)
describe FROM_TEATIME_BUT_IP_ERROR From test.com.cn but ip not match
score FROM_TEATIME_BUT_IP_ERROR 11.0

# From addr like <some chinese>@mydomain

header __FROM_8BIT_LOCAL From:addr =~ /[a-zA-Z0-9_-]*[\x80xff][a-zA-Z0-9_-]*\@test\.com\.cn/i
header __TO_8BIT_LOCAL To:addr =~ /[a-zA-Z0-9_-]*[\x80-\xff][a-zA-Z0-9_-]*\@test\.com\.cn/i
header __CC_8BIT_LOCAL Cc:addr =~ /[a-zA-Z0-9_-]*[\x80-\xff][a-zA-Z0-9_-]*\@test\.com\.cn/i
meta LOCAL_8BIT_USER (__FROM_8BIT_LOCAL || __TO_8BIT_LOCAL || __CC_8BIT_LOCAL)
describe LOCAL_8BIT_USER From or To a chinese@test.com.cn
score LOCAL_8BIT_USER 11.0

score       ADDRESS_IN_SUBJECT       11.00       # To: address appears in Subject
score       ADDR_FREE              3.000       # From Address contains FREE
score       BAD_ENC_HEADER              0.500       # Message has bad MIME encoding in the header
score       BANG_MORE              5.000       # Talks about more with an exclamation!
score       BILLION_DOLLARS              0.500       # Talks about lots of money
score       BAYES_00              0.000       # Bayesian spam probability is 0 to 1%
score       BAYES_05              0.000       # Bayesian spam probability is 1 to 5%
score       BAYES_20              0.000       # Bayesian spam probability is 5 to 20%
score       BAYES_40              0.500       # Bayesian spam probability is 20 to 40%
score       BAYES_50              1.000       # Bayesian spam probability is 40 to 60%
score       BAYES_60              1.000       # Bayesian spam probability is 60 to 80%
score       BAYES_80              1.000       # Bayesian spam probability is 80 to 95%
score       BAYES_95              1.000       # Bayesian spam probability is 95 to 99%
score       BAYES_99              1.000       # Bayesian spam probability is 99 to 100%
score       DATE_IN_FUTURE_03_06       1.000       # Date: is 3 to 6 hours after Received: date
score       DATE_IN_FUTURE_06_12       1.000       # Date: is 6 to 12 hours after Received: date
score       DATE_IN_FUTURE_12_24       1.000       # Date: is 12 to 24 hours after Received: date
score       DATE_IN_FUTURE_24_48       1.000       # Date: is 24 to 48 hours after Received: date
score       DATE_IN_FUTURE_48_96       1.000       # Date: is 48 to 96 hours after Received: date
score       DATE_IN_FUTURE_96_XX       1.000       # Date: is 96 hours or more after Received: date
score       DATE_IN_PAST_03_06       1.000       # Date: is 3 to 6 hours before Received: date
score       DATE_IN_PAST_06_12       1.000       # Date: is 6 to 12 hours before Received: date
score       DATE_IN_PAST_12_24       1.000       # Date: is 12 to 24 hours before Received: date
score       DATE_IN_PAST_24_48       1.000       # Date: is 24 to 48 hours before Received: date
score       DATE_IN_PAST_48_96       1.000       # Date: is 48 to 96 hours before Received: date
score       DATE_IN_PAST_96_XX       1.000       # Date: is 96 hours or more before Received: date
score       DATE_SPAMWARE_Y2K       1.000       # Date header uses unusual Y2K formatting
score       DNS_FROM_AHBL_RHSBL       0.000       # From: sender listed in dnsbl.ahbl.org
score       DNS_FROM_RFC_ABUSE       0.000       # Envelope sender in abuse.rfc-ignorant.org
score       DNS_FROM_RFC_BOGUSMX       0.000       # Envelope sender in bogusmx.rfc-ignorant.org
score       DNS_FROM_RFC_DSN       0.000       # Envelope sender in dsn.rfc-ignorant.org
score       DNS_FROM_RFC_POST       0.000       # Envelope sender in postmaster.rfc-ignorant.org
score       DNS_FROM_RFC_WHOIS       0.000       # Envelope sender in whois.rfc-ignorant.org
score       DNS_FROM_SECURITYSAGE       1.000       # Envelope sender in blackholes.securitysage.com
score       DOMAIN_4U2              2.000       # Domain name containing a "4u" variant
score       DOMAIN_RATIO              3.000       # Message body mentions many internet domains
score       EMPTY_MESSAGE              1.000       # Message appears to be empty with no Subject: text
score       ENGLISH_UCE_SUBJECT       1.400       # Subject contains an English UCE tag
score       FORGED_HOTMAIL_RCVD       1.000       # Forged hotmail.com &#39;Received:&#39; header found
score       FORGED_HOTMAIL_RCVD2       1.000       # hotmail.com &#39;From&#39; address, but no &#39;Received:&#39;
score       FORGED_MSGID_AOL       1.500       # Message-ID is forged, (aol.com)
score       FORGED_MSGID_EXCITE    1.500       # Message-ID is forged, (excite.com)
score       FORGED_MSGID_HOTMAIL       1.500       # Message-ID is forged, (hotmail.com)
score       FORGED_MSGID_MSN       1.500       # Message-ID is forged, (msn.com)
score       FORGED_MSGID_YAHOO       1.500       # Message-ID is forged, (yahoo.com)
score       FORGED_MUA_AOL_FROM       1.500       # Forged mail pretending to be from AOL (by From)
score       FORGED_MUA_EUDORA       1.500       # Forged mail pretending to be from Eudora
score       FORGED_MUA_IMS              1.500       # Forged mail pretending to be from IMS
score       FORGED_MUA_MOZILLA       1.500       # Forged mail pretending to be from Mozilla
score       FORGED_MUA_OIMO              1.500       # Forged mail pretending to be from MS Outlook IMO
score       FORGED_MUA_OUTLOOK       1.500       # Forged mail pretending to be from MS Outlook
score       FORGED_MUA_THEBAT_BOUN       2.000       # Mail pretending to be from The Bat! (boundary)
score       FORGED_MUA_THEBAT_CS       1.500       # Mail pretending to be from The Bat! (charset)
score       FORGED_OUTLOOK_HTML       1.500       # Outlook can&#39;t send HTML message only
score       FORGED_OUTLOOK_TAGS       1.000       # Outlook can&#39;t send HTML in this format
score       FROM_ALL_NUMS              1.500       # From numeric address (except US/Canada phones)
score       FROM_BLANK_NAME              1.500       # From: contains empty name
score       FROM_DOMAIN_NOVOWEL       1.500       # From: domain has series of non-vowel letters
score       FROM_ENDS_IN_NUMS       1.000       # From: ends in many numbers
score       FROM_ILLEGAL_CHARS       1.000       # From: has too many raw illegal characters
score       GAPPY_SUBJECT              3.000       # Subject: contains G.a.p.p.y-T.e.x.t
score       HEAD_ILLEGAL_CHARS       0.500       # Headers have too many raw illegal characters
score       HELO_DYNAMIC_IPADDR2       2.000       # Relay HELO&#39;d using suspicious hostname (IP addr 2)
score       HTML_00_10              0.000       # Message is 0% to 10% HTML
score       HTML_10_20              0.000       # Message is 10% to 20% HTML
score       HTML_20_30              0.000       # Message is 20% to 30% HTML
score       HTML_30_40              0.000       # Message is 30% to 40% HTML
score       HTML_40_50              0.000       # Message is 40% to 50% HTML
score       HTML_50_60              0.000       # Message is 50% to 60% HTML
score       HTML_60_70              0.000       # Message is 60% to 70% HTML
score       HTML_70_80              0.000       # Message is 70% to 80% HTML
score       HTML_80_90              0.000       # Message is 80% to 90% HTML
score       HTML_90_100              0.000       # Message is 90% to 100% HTML
score       HTML_FONT_BIG              0.000       # HTML tag for a big font size
score       HTML_FONT_FACE_BAD       0.000       # HTML font face is not a word
score       HTML_IMAGE_ONLY_04       0.500       # HTML: images with 0-400 bytes of words
score       HTML_IMAGE_ONLY_08       0.400       # HTML: images with 400-800 bytes of words
score       HTML_IMAGE_ONLY_12       1.000       # HTML: images with 800-1200 bytes of words
score       HTML_IMAGE_ONLY_16       1.000       # HTML: images with 1200-1600 bytes of words
score       HTML_IMAGE_ONLY_20       1.000       # HTML: images with 1600-2000 bytes of words
score       HTML_IMAGE_ONLY_24       1.000       # HTML: images with 2000-2400 bytes of words
score       HTML_IMAGE_ONLY_28       1.000       # HTML: images with 2400-2800 bytes of words
score       HTML_IMAGE_ONLY_32       1.000       # HTML: images with 2800-3200 bytes of words
score       HTML_IMAGE_RATIO_02       2.000       # HTML has a low ratio of text to image area
score       HTML_IMAGE_RATIO_04       2.000       # HTML has a low ratio of text to image area
score       HTML_IMAGE_RATIO_06       2.000       # HTML has a low ratio of text to image area
score       HTML_IMAGE_RATIO_08       2.000       # HTML has a low ratio of text to image area
score       HTML_MESSAGE              0.000       # HTML included in message
score       HTML_MIME_NO_HTML_TAG       3.000       # HTML-only message, but there is no HTML tag
score       HTML_MISSING_CTYPE       3.000       # Message is HTML without HTML Content-Type
score       HTML_NONELEMENT_00_10       0.000       # 0% to 10% of HTML elements are non-standard
score       HTML_NONELEMENT_10_20       0.500       # 10% to 20% of HTML elements are non-standard
score       HTML_NONELEMENT_20_30       0.000       # 20% to 30% of HTML elements are non-standard
score       HTML_NONELEMENT_30_40       0.500       # 30% to 40% of HTML elements are non-standard
score       HTML_NONELEMENT_40_50       0.000       # 40% to 50% of HTML elements are non-standard
score       HTML_NONELEMENT_50_60       0.500       # 50% to 60% of HTML elements are non-standard
score       HTML_NONELEMENT_60_70       0.000       # 60% to 70% of HTML elements are non-standard
score       HTML_NONELEMENT_70_80       0.500       # 70% to 80% of HTML elements are non-standard
score       HTML_NONELEMENT_80_90       0.000       # 80% to 90% of HTML elements are non-standard
score       HTML_NONELEMENT_90_100       0.500       # 90% to 100% of HTML elements are non-standard
score       HTML_OBFUSCATE_05_10       0.600       # Message is 5% to 10% HTML obfuscation
score       HTML_OBFUSCATE_10_20       0.500       # Message is 10% to 20% HTML obfuscation
score       HTML_OBFUSCATE_20_30       1.000       # Message is 20% to 30% HTML obfuscation
score       HTML_OBFUSCATE_30_40       1.000       # Message is 30% to 40% HTML obfuscation
score       HTML_OBFUSCATE_40_50       1.000       # Message is 40% to 50% HTML obfuscation
score       HTML_OBFUSCATE_50_60       1.500       # Message is 50% to 60% HTML obfuscation
score       HTML_OBFUSCATE_60_70       1.500       # Message is 60% to 70% HTML obfuscation
score       HTML_OBFUSCATE_70_80       1.000       # Message is 70% to 80% HTML obfuscation
score       HTML_OBFUSCATE_80_90       1.000       # Message is 80% to 90% HTML obfuscation
score   HTML_OBFUSCATE_90_100       1.000   # Message is 90% to 100% HTML obfuscation
score       HTML_SHORT_LINK_IMG_1       2.000       # HTML is very short with a linked image
score       HTML_SHORT_LINK_IMG_2       2.000       # HTML is very short with a linked image
score       HTML_SHORT_LINK_IMG_3       0.500       # HTML is very short with a linked image
score   HTML_TAG_EXIST_BGSOUND  0.500   # HTML has "bgsound" tag
score   HTML_TAG_EXIST_MARQUEE  0.500   # HTML has "marquee" tag
score   HTML_TAG_EXIST_TBODY    0.500   # HTML has "tbody" tag
score       HTML_TEXT_AFTER_BODY       0.500       # HTML contains text after BODY close tag
score       HTML_TEXT_AFTER_HTML       0.500       # HTML contains text after HTML close tag
score       INVALID_DATE              0.500       # Invalid Date: header (not RFC 2822)
score       INVALID_MSGID              0.500       # Message-Id is not valid, according to RFC 2822
score       INVALID_TZ_CST              0.500       # Invalid date in header (wrong CST timezone)
score       INVALID_TZ_EST              0.500       # Invalid date in header (wrong EST timezone)
score       INVALID_TZ_GMT              0.500       # Invalid date in header (wrong GMT/UTC timezone)
score       MAILTO_TO_SPAM_ADDR       0.200       # Includes a link to a likely spammer email
score       MIME_BASE64_NO_NAME       0.500       # base64 attachment does not have a file name
score       MIME_BASE64_TEXT       0.500       # Message text disguised using base64 encoding
score       MIME_HTML_ONLY              0.500       # Message only has text/html MIME parts
score       MIME_HTML_ONLY_MULTI       0.000       # Multipart message only has text/html MIME parts
score       MIME_HEADER_CTYPE_ONLY       0.000       # &#39;Content-Type&#39; found without required MIME headers
score       MISSING_MIMEOLE              0.500       # Message has X-MSMail-Priority, but no X-MimeOLE
score       MISSING_SUBJECT              1.000       # Missing Subject: header
score       MSGID_DOLLARS              3.000       # Message-Id has pattern used in spam
score       MSGID_FROM_MTA_ID       0.500       # Message-Id for external message added locally
score       MSGID_OUTLOOK_INVALID       0.500       # Message-Id is fake (in Outlook Express format)
score       NO_DNS_FOR_FROM              0.500       # Envelope sender has no MX or A DNS records
score       NO_REAL_NAME              1.000       # From: does not include a real name
score       PLING_PLING              1.000       # Subject has lots of exclamation marks
score       RATWARE_MS_HASH              3.000       # Bulk email fingerprint (msgid ms hash) found
score       RATWARE_RCVD_AT              3.000       # Bulk email fingerprint (Received @) found
score       RATWARE_RCVD_LC_ESMTP       1.500       # Bulk email fingerprint (&#39;esmtp&#39; Received) found
score       RATWARE_RCVD_PF              3.000       # Bulk email fingerprint (Received PF) found
score       RATWARE_ZERO_TZ              3.000       # Bulk email fingerprint (+0000) found
score       RCVD_DOUBLE_IP_SPAM       0.500       # Bulk email fingerprint (double IP) found
score       RCVD_HELO_IP_MISMATCH       1.000       # Received: HELO and IP do not match, but should
score       RCVD_ILLEGAL_IP              1.000       # Received: contains illegal IP address
score       RCVD_NUMERIC_HELO       1.000       # Received: contains an IP address used for HELO
score       SPF_FAIL              11.00       # SPF: sender does not match SPF record (fail)
score       SPF_HELO_FAIL              11.00       # SPF: HELO does not match SPF record (fail)
score       SPF_HELO_NEUTRAL       1.000       # SPF: HELO does not match SPF record (neutral)
score       SPF_HELO_PASS              0.000       # SPF: HELO matches SPF record
score       SPF_HELO_SOFTFAIL       0.000       # SPF: HELO does not match SPF record (softfail)
score       SPF_NEUTRAL              1.000       # SPF: sender does not match SPF record (neutral)
score       SPF_PASS              0.000       # SPF: sender matches SPF record
score       SPF_SOFTFAIL              0.500       # SPF: sender does not match SPF record (softfail)
score       SUBJECT_DIET              1.812       # Subject talks about losing pounds
score       SUBJECT_DRUG_GAP_C       3.000       # Subject contains a gappy version of &#39;cialis&#39;
score       SUBJECT_DRUG_GAP_L       3.000       # Subject contains a gappy version of &#39;levitra&#39;
score       SUBJECT_DRUG_GAP_P       3.000       # Subject contains a gappy version of &#39;phentermine&#39;
score       SUBJECT_DRUG_GAP_S       3.000       # Subject contains a gappy version of &#39;soma&#39;
score        SUBJECT_DRUG_GAP_VA       3.000       # Subject contains a gappy version of &#39;valium&#39;
score       SUBJECT_DRUG_GAP_VIC       3.000       # Subject contains a gappy version of &#39;vicodin&#39;
score       SUBJECT_DRUG_GAP_X       3.000       # Subject contains a gappy version of &#39;xanax&#39;
score       SUBJECT_ENCODED_TWICE       2.000       # Subject: MIME encoded twice
score       SUBJECT_EXCESS_BASE64       0.782       # Subject: base64 encoded encoded unnecessarily
score       SUBJECT_EXCESS_QP       0.000       # Subject: quoted-printable encoded unnecessarily
score       SUBJECT_FUZZY_CHEAP       3.000       # Attempt to obfuscate words in Subject:
score       SUBJECT_FUZZY_MEDS       3.000       # Attempt to obfuscate words in Subject:
score       SUBJECT_FUZZY_PENIS       3.000       # Attempt to obfuscate words in Subject:
score       SUBJECT_FUZZY_TION       3.000       # Attempt to obfuscate words in Subject:
score       SUBJECT_NOVOWEL              0.000       # Subject: has long non-vowel letter sequence
score       SUBJECT_SEXUAL              2.160       # Subject indicates sexually-explicit content
score       SUBJ_2_NUM_PARENS       0.952       # Subject contains common spam sign (2 numbers)
score       SUBJ_ALL_CAPS              2.000       # Subject is all capitals
score       SUBJ_AS_SEEN              3.000       # Subject contains "As Seen"
score       SUBJ_BUY              2.000       # Subject line starts with Buy or Buying
score       SUBJ_CONSONANTS              0.000       # Subject contains consecutive consonants in "word"
score       SUBJ_DOLLARS              0.650       # Subject starts with dollar amount
score       SUBJ_FOR_ONLY              1.500       # Subject contains "For Only"
score       SUBJ_FREE_CAP              1.200       # Subject contains "FREE" in CAPS
score       SUBJ_GUARANTEED              1.360       # Subject GUARANTEED
score       SUBJ_HAS_SPACES              1.000       # Subject contains lots of white space
score       SUBJ_HAS_UNIQ_ID       0.895       # Subject contains a unique ID
score       SUBJ_ILLEGAL_CHARS       1.000       # # Subject: has too many raw illegal characters
score       SUBJ_LIFE_INSURANCE       11.00       # Subject includes "life insurance"
score       SUBJ_YOUR_DEBT              11.00       # Subject contains "Your Bills" or similar
score       SUBJ_YOUR_FAMILY       11.00       # Subject contains "Your Family"
score       SUBJ_YOUR_OWN              4.000       # Subject contains "Your Own"
score       SUB_FREE_OFFER              1.000       # Subject starts with "Free"
score       SUB_HELLO              1.500       # Subject starts with "Hello"
score       TO_EMPTY              11.00       # To: is empty
score       UNPARSEABLE_RELAY       0.000       # Informational: message has unparseable relay lines
score       UPPERCASE_25_50              0.500       # message body is 25-50% uppercase
score       UPPERCASE_50_75              1.000       # message body is 50-75% uppercase
score       UPPERCASE_75_100       1.500       # message body is 75-100% uppercase
score        URIBL_AB_SURBL              0.000       # Contains an URL listed in the AB SURBL blocklist
score        URIBL_JP_SURBL              0.000       # Contains an URL listed in the JP SURBL blocklist
score        URIBL_OB_SURBL              0.000       # Contains an URL listed in the OB SURBL blocklist
score        URIBL_PH_SURBL              0.000       # Contains an URL listed in the PH SURBL blocklist
score        URIBL_SBL              0.000       # Contains an URL listed in the SBL blocklist
score        URIBL_SC_SURBL              0.000       # Contains an URL listed in the SC SURBL blocklist
score        URIBL_WS_SURBL              0.000       # Contains an URL listed in the WS SURBL blocklist
score   URI_SCHEME_MIXED_CASE       1.500   # URI scheme has mixed uppercase and lowercase
score       WEIRD_QUOTING              1.000       # Weird repeated double-quotation marks
score       USER_IN_BLACKLIST       20.00       # From: address is in the user&#39;s black-list
score       USER_IN_WHITELIST       -80.0       # From: address is in the user&#39;s white-list

TOP

感谢楼主的无私奉献
多吃青菜.不食肉鱼虾蛋奶蜜
素食环保敬老勤俭
群:4161619

TOP

刚看到,收下,谢了,不知道老大有没有再次更新?呵呵

TOP

这样的文件还是打包,下载好些,copy很容易出错的

TOP

不错,试用用,谢了

TOP

发新话题