|
|
@@ -0,0 +1,218 @@
|
|
|
+package com.zhentao.utils;
|
|
|
+
|
|
|
+import java.util.*;
|
|
|
+import java.util.concurrent.ConcurrentHashMap;
|
|
|
+import java.util.regex.Pattern;
|
|
|
+
|
|
|
+public class ContactFilter {
|
|
|
+ // 消息类型枚举:新增「数字+文字混合」类型
|
|
|
+ private enum MsgType {
|
|
|
+ PURE_LETTER, // 纯字母
|
|
|
+ LETTER_DIGIT, // 字母+数字混合
|
|
|
+ PURE_DIGIT, // 纯数字
|
|
|
+ DIGIT_TEXT_MIX, // 数字+文字(含中文)混合(新增)
|
|
|
+ OTHER // 其他类型(纯符号、纯中文等)
|
|
|
+ }
|
|
|
+
|
|
|
+ // 会话消息实体:存储消息内容+类型
|
|
|
+ private static class MsgInfo {
|
|
|
+ String content;
|
|
|
+ MsgType type;
|
|
|
+ int digitLength; // 新增:记录消息中的数字长度(用于动态拦截)
|
|
|
+
|
|
|
+ MsgInfo(String content, MsgType type, int digitLength) {
|
|
|
+ this.content = content;
|
|
|
+ this.type = type;
|
|
|
+ this.digitLength = digitLength;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ // 1. 会话上下文缓存
|
|
|
+ private static final Map<String, Deque<MsgInfo>> USER_SESSION_CACHE = new ConcurrentHashMap<>();
|
|
|
+ private static final int MAX_SESSION_MSG_COUNT = 15;
|
|
|
+ private static final int MAX_SINGLE_LEN = 4; // 单次数字最大长度(超4直接拦截)
|
|
|
+ private static final int CONSECUTIVE_TIMES_LOW = 3; // 数字长度≤3时,连续3条拦截
|
|
|
+ private static final int CONSECUTIVE_TIMES_HIGH = 2; // 数字长度=4/其他类型时,连续2条拦截
|
|
|
+
|
|
|
+
|
|
|
+ // -------------------------- 正则/关键词配置 --------------------------
|
|
|
+ private static final Pattern PHONE_PATTERN = Pattern.compile(
|
|
|
+ "(?:(?:\\+|00)86)?1[3-9]\\d{9}|\\d{10,11}"
|
|
|
+ );
|
|
|
+
|
|
|
+ private static final Pattern WECHAT_PATTERN = Pattern.compile(
|
|
|
+ "(?:微信|微|V|vx|wx|绿泡泡|联系方式)[::\\s]*[a-zA-Z0-9][a-zA-Z0-9_]{5,19}"
|
|
|
+ );
|
|
|
+
|
|
|
+ private static final Set<String> SENSITIVE_KEYWORDS = new HashSet<String>() {{
|
|
|
+ add("加我"); add("联系方式"); add("私聊"); add("企鹅号"); add("QQ");
|
|
|
+ add("抖音号"); add("快手号"); add("链接"); add("二维码"); add("私信");
|
|
|
+ add("wx"); add("vx");add("dy");
|
|
|
+ }};
|
|
|
+
|
|
|
+
|
|
|
+ // -------------------------- 核心审核方法 --------------------------
|
|
|
+ public static boolean hasContact(String userId, String text) {
|
|
|
+ if (text == null || text.isEmpty()) return false;
|
|
|
+ String lowerText = text.toLowerCase().trim();
|
|
|
+
|
|
|
+ // 步骤1:提取数字长度(用于后续判断)
|
|
|
+ int digitLength = extractDigitLength(lowerText);
|
|
|
+
|
|
|
+ // 步骤2:判断当前消息类型
|
|
|
+ MsgType currentType = getMsgType(lowerText, digitLength);
|
|
|
+
|
|
|
+ // 步骤3:单次内容拦截
|
|
|
+ // - 纯字母/字母数字/纯数字:长度>4
|
|
|
+ // - 数字+文字混合:数字长度>4
|
|
|
+ if ((currentType == MsgType.PURE_LETTER && lowerText.length() > MAX_SINGLE_LEN)
|
|
|
+ || (currentType == MsgType.LETTER_DIGIT && lowerText.length() > MAX_SINGLE_LEN)
|
|
|
+ || (currentType == MsgType.PURE_DIGIT && lowerText.length() > MAX_SINGLE_LEN)
|
|
|
+ || (currentType == MsgType.DIGIT_TEXT_MIX && digitLength > MAX_SINGLE_LEN)) {
|
|
|
+ return true;
|
|
|
+ }
|
|
|
+
|
|
|
+ // 步骤4:连续发送拦截(动态次数)
|
|
|
+ if (isConsecutiveOverLimit(userId, currentType, digitLength)) {
|
|
|
+ return true;
|
|
|
+ }
|
|
|
+
|
|
|
+ // 步骤5:更新会话缓存(存储数字长度)
|
|
|
+ String sessionText = updateAndGetSessionText(userId, lowerText, currentType, digitLength);
|
|
|
+
|
|
|
+ // 步骤6:联系方式检测
|
|
|
+ if (PHONE_PATTERN.matcher(sessionText).find() || WECHAT_PATTERN.matcher(sessionText).find()) {
|
|
|
+ return true;
|
|
|
+ }
|
|
|
+ if (isSensitiveWithAccount(sessionText)) {
|
|
|
+ return true;
|
|
|
+ }
|
|
|
+
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ // -------------------------- 辅助方法 --------------------------
|
|
|
+ /**
|
|
|
+ * 判断消息类型(新增数字长度校验)
|
|
|
+ */
|
|
|
+ private static MsgType getMsgType(String text, int digitLength) {
|
|
|
+ if (text.matches("[a-zA-Z]+")) {
|
|
|
+ return MsgType.PURE_LETTER;
|
|
|
+ } else if (text.matches("\\d+")) {
|
|
|
+ return MsgType.PURE_DIGIT;
|
|
|
+ } else if (text.matches(".*[a-zA-Z].*") && text.matches(".*\\d.*")) {
|
|
|
+ return MsgType.LETTER_DIGIT;
|
|
|
+ } else if (digitLength > 0 && text.matches(".*[^a-zA-Z0-9].*")) {
|
|
|
+ // 包含数字 + 包含非字母数字(如中文)→ 数字+文字混合
|
|
|
+ return MsgType.DIGIT_TEXT_MIX;
|
|
|
+ } else {
|
|
|
+ return MsgType.OTHER;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 提取消息中的数字长度
|
|
|
+ */
|
|
|
+ private static int extractDigitLength(String text) {
|
|
|
+ int digitLen = 0;
|
|
|
+ for (char c : text.toCharArray()) {
|
|
|
+ if (Character.isDigit(c)) {
|
|
|
+ digitLen++;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return digitLen;
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 动态连续发送拦截:
|
|
|
+ * - DIGIT_TEXT_MIX且数字长度≤3 → 连续3条拦截
|
|
|
+ * - DIGIT_TEXT_MIX且数字长度=4/其他非OTHER类型 → 连续2条拦截
|
|
|
+ */
|
|
|
+ private static boolean isConsecutiveOverLimit(String userId, MsgType currentType, int currentDigitLen) {
|
|
|
+ if (currentType == MsgType.OTHER) {
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+
|
|
|
+ Deque<MsgInfo> sessionMsgs = USER_SESSION_CACHE.get(userId);
|
|
|
+ if (sessionMsgs == null || sessionMsgs.isEmpty()) {
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+
|
|
|
+ // 确定当前类型的拦截阈值
|
|
|
+ int threshold;
|
|
|
+ if (currentType == MsgType.DIGIT_TEXT_MIX && currentDigitLen <= 3) {
|
|
|
+ threshold = CONSECUTIVE_TIMES_LOW; // 数字≤3 → 3条拦截
|
|
|
+ } else {
|
|
|
+ threshold = CONSECUTIVE_TIMES_HIGH; // 数字=4/其他类型 → 2条拦截
|
|
|
+ }
|
|
|
+
|
|
|
+ int consecutiveCount = 1; // 本次算1次
|
|
|
+ Iterator<MsgInfo> iterator = sessionMsgs.descendingIterator();
|
|
|
+ while (iterator.hasNext()) {
|
|
|
+ MsgInfo prevMsg = iterator.next();
|
|
|
+ // 只统计非OTHER类型(且DIGIT_TEXT_MIX需是同类型)
|
|
|
+ if (prevMsg.type != MsgType.OTHER) {
|
|
|
+ consecutiveCount++;
|
|
|
+ if (consecutiveCount >= threshold) {
|
|
|
+ return true;
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ break; // 遇到OTHER类型,停止统计
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 更新会话缓存(存储数字长度)
|
|
|
+ */
|
|
|
+ private static String updateAndGetSessionText(String userId, String content, MsgType type, int digitLength) {
|
|
|
+ Deque<MsgInfo> sessionMsgs = USER_SESSION_CACHE.computeIfAbsent(userId, k -> new LinkedList<>());
|
|
|
+ sessionMsgs.addLast(new MsgInfo(content, type, digitLength));
|
|
|
+ if (sessionMsgs.size() > MAX_SESSION_MSG_COUNT) {
|
|
|
+ sessionMsgs.removeFirst();
|
|
|
+ }
|
|
|
+ StringBuilder sb = new StringBuilder();
|
|
|
+ for (MsgInfo msg : sessionMsgs) {
|
|
|
+ sb.append(msg.content);
|
|
|
+ }
|
|
|
+ return sb.toString();
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 匹配敏感关键词+疑似账号
|
|
|
+ */
|
|
|
+ private static boolean isSensitiveWithAccount(String text) {
|
|
|
+ for (String keyword : SENSITIVE_KEYWORDS) {
|
|
|
+ if (text.contains(keyword)) {
|
|
|
+ Pattern accountPattern = Pattern.compile("[a-zA-Z0-9]{6,}|\\d{4,}");
|
|
|
+ if (accountPattern.matcher(text).find()) {
|
|
|
+ return true;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ // -------------------------- 测试(动态拦截规则) --------------------------
|
|
|
+ public static void main(String[] args) {
|
|
|
+ String userId = "testUser001";
|
|
|
+ USER_SESSION_CACHE.clear();
|
|
|
+
|
|
|
+ // 测试:数字+文字混合(单条数字长度=1,连续3条拦截)
|
|
|
+ System.out.println("第1次:1吃了没 → " + hasContact(userId, "1吃了没")); // false(连续1次)
|
|
|
+ System.out.println("第2次:6睡了没 → " + hasContact(userId, "6睡了没")); // false(连续2次)
|
|
|
+ System.out.println("第3次:6死了没 → " + hasContact(userId, "6死了没")); // true(连续3次,拦截)
|
|
|
+
|
|
|
+ // 重置缓存,测试数字长度=4的情况
|
|
|
+ USER_SESSION_CACHE.clear();
|
|
|
+ System.out.println("--- 重置缓存 ---");
|
|
|
+ System.out.println("第1次:1234吃了没 → " + hasContact(userId, "1234吃了没")); // false(连续1次)
|
|
|
+ System.out.println("第2次:5678睡了没 → " + hasContact(userId, "5678睡了没")); // true(连续2次,拦截)
|
|
|
+
|
|
|
+ // 测试单条数字长度>4
|
|
|
+ System.out.println("单条数字超4:12345吃了没 → " + hasContact(userId, "12345吃了没")); // true(直接拦截)
|
|
|
+ }
|
|
|
+}
|