|
| 1 | +/** |
| 2 | + * HeuristicClassifier — multi-port, extensible traffic classifier. |
| 3 | + * |
| 4 | + * Analyses inbound connection events across any of the server's public ports |
| 5 | + * and produces a Classification with a threat score (0–100) and findings list. |
| 6 | + * |
| 7 | + * Extensibility model: |
| 8 | + * • Extend AbstractHeuristic to add new scoring rules (inherits port-set, findings API). |
| 9 | + * • Implement IHeuristicModule to plug in entirely independent classification modules. |
| 10 | + * • Register both kinds via HeuristicClassifier.register(…). |
| 11 | + * |
| 12 | + * Built-in rules: |
| 13 | + * • Repetition / rapid connection from a single IP (rate heuristic). |
| 14 | + * • Geo-location concentration — many connections from same country. |
| 15 | + * • Port scan detection — same IP hitting multiple distinct ports. |
| 16 | + * • Basic payload pattern flags (known bad keywords). |
| 17 | + * |
| 18 | + * @author Max Rupplin |
| 19 | + * @date June 08 2026 EST |
| 20 | + */ |
| 21 | +package heuristics; |
| 22 | + |
| 23 | +import java.time.Instant; |
| 24 | +import java.util.*; |
| 25 | +import java.util.concurrent.ConcurrentHashMap; |
| 26 | +import java.util.concurrent.CopyOnWriteArrayList; |
| 27 | + |
| 28 | +public class HeuristicClassifier |
| 29 | +{ |
| 30 | + // ── Known public ports ──────────────────────────────────────────────────── |
| 31 | + public static final Set<Integer> PUBLIC_PORTS = Set.of( |
| 32 | + 49152, // WEBEXPRESS base / telnet proxy |
| 33 | + 5512, // AES2 WebExpress |
| 34 | + 6682, // Bitcoin WebExpress |
| 35 | + 49155, // ConnectionStatusServer |
| 36 | + 49188 // ModuleLoaderDaemon |
| 37 | + // RSA / DSA ports resolved at runtime via NitroWebExpress.Aspect constants |
| 38 | + ); |
| 39 | + |
| 40 | + // ── Thresholds ──────────────────────────────────────────────────────────── |
| 41 | + private static final int RATE_WINDOW_SECS = 60; |
| 42 | + private static final int RATE_LIMIT = 20; // connections / window / IP |
| 43 | + private static final int GEO_CONCENTRATION = 30; // % of total before flagging |
| 44 | + private static final int PORT_SCAN_THRESHOLD = 3; // distinct ports before flagging |
| 45 | + |
| 46 | + // ── Registered extension modules ────────────────────────────────────────── |
| 47 | + private static final List<IHeuristicModule> modules = new CopyOnWriteArrayList<>(); |
| 48 | + |
| 49 | + // ── Per-IP & per-country connection tracking (in-memory, rolling) ───────── |
| 50 | + // Structure: IP -> list of epoch-second timestamps |
| 51 | + private final Map<String, List<Long>> ipTimestamps = new ConcurrentHashMap<>(); |
| 52 | + // Structure: IP -> set of ports seen |
| 53 | + private final Map<String, Set<Integer>> ipPorts = new ConcurrentHashMap<>(); |
| 54 | + // Structure: countryCode -> count |
| 55 | + private final Map<String, Integer> countryCount = new ConcurrentHashMap<>(); |
| 56 | + private int totalConnections = 0; |
| 57 | + |
| 58 | + // ───────────────────────────────────────────────────────────────────────── |
| 59 | + // Extensibility model |
| 60 | + // ───────────────────────────────────────────────────────────────────────── |
| 61 | + |
| 62 | + /** |
| 63 | + * Implement this interface (keyword: implements IHeuristicModule) to register |
| 64 | + * a fully independent scoring module that receives every connection event. |
| 65 | + */ |
| 66 | + public interface IHeuristicModule |
| 67 | + { |
| 68 | + /** Called once per inbound connection event; add findings and return score delta (0–100). */ |
| 69 | + int evaluate(ConnectionEvent event, List<String> findings); |
| 70 | + String moduleName(); |
| 71 | + } |
| 72 | + |
| 73 | + /** |
| 74 | + * Extend this class (keyword: extends AbstractHeuristic) to build a reusable |
| 75 | + * heuristic that has built-in access to port membership helpers and the findings API. |
| 76 | + */ |
| 77 | + public static abstract class AbstractHeuristic implements IHeuristicModule |
| 78 | + { |
| 79 | + /** Returns true if the event arrived on one of the known public ports. */ |
| 80 | + protected boolean isPublicPort(final ConnectionEvent event) |
| 81 | + { |
| 82 | + return PUBLIC_PORTS.contains(event.port); |
| 83 | + } |
| 84 | + |
| 85 | + /** Convenience: add a WARNING finding and return a score penalty. */ |
| 86 | + protected int warn(final String message, final int penalty, final List<String> findings) |
| 87 | + { |
| 88 | + findings.add("WARN " + message); |
| 89 | + return penalty; |
| 90 | + } |
| 91 | + |
| 92 | + /** Convenience: add a PASS finding with zero penalty. */ |
| 93 | + protected int pass(final String message, final List<String> findings) |
| 94 | + { |
| 95 | + findings.add("PASS " + message); |
| 96 | + return 0; |
| 97 | + } |
| 98 | + } |
| 99 | + |
| 100 | + /** Register an IHeuristicModule or AbstractHeuristic (both implement the interface). */ |
| 101 | + public static void register(final IHeuristicModule module) |
| 102 | + { |
| 103 | + if (module == null) throw new IllegalArgumentException("null module"); |
| 104 | + modules.add(module); |
| 105 | + } |
| 106 | + |
| 107 | + // ───────────────────────────────────────────────────────────────────────── |
| 108 | + // Public API |
| 109 | + // ───────────────────────────────────────────────────────────────────────── |
| 110 | + |
| 111 | + /** |
| 112 | + * Classify a single inbound connection event. |
| 113 | + * |
| 114 | + * @param event the connection to analyse |
| 115 | + * @return Classification with threat score and findings |
| 116 | + */ |
| 117 | + public Classification classify(final ConnectionEvent event) |
| 118 | + { |
| 119 | + Objects.requireNonNull(event, "event must not be null"); |
| 120 | + |
| 121 | + List<String> findings = new ArrayList<>(); |
| 122 | + int score = 0; |
| 123 | + |
| 124 | + // ── 1. Port membership check ────────────────────────────────────────── |
| 125 | + if (!PUBLIC_PORTS.contains(event.port)) |
| 126 | + { |
| 127 | + findings.add("INFO port " + event.port + " not in known public-port set"); |
| 128 | + } |
| 129 | + else |
| 130 | + { |
| 131 | + findings.add("PASS recognised public port " + event.port); |
| 132 | + } |
| 133 | + |
| 134 | + // ── 2. IP repetition / rate heuristic ──────────────────────────────── |
| 135 | + score += checkIpRate(event, findings); |
| 136 | + |
| 137 | + // ── 3. Port-scan detection ──────────────────────────────────────────── |
| 138 | + score += checkPortScan(event, findings); |
| 139 | + |
| 140 | + // ── 4. Geo-location concentration ──────────────────────────────────── |
| 141 | + score += checkGeoConcentration(event, findings); |
| 142 | + |
| 143 | + // ── 5. Payload keyword scan ─────────────────────────────────────────── |
| 144 | + score += checkPayload(event, findings); |
| 145 | + |
| 146 | + // ── 6. Extension modules ────────────────────────────────────────────── |
| 147 | + for (IHeuristicModule module : modules) |
| 148 | + { |
| 149 | + try |
| 150 | + { |
| 151 | + int delta = module.evaluate(event, findings); |
| 152 | + score += Math.max(0, Math.min(delta, 100)); |
| 153 | + findings.add("MOD [" + module.moduleName() + "] returned delta=" + delta); |
| 154 | + } |
| 155 | + catch (Exception e) |
| 156 | + { |
| 157 | + findings.add("ERR [" + module.moduleName() + "] threw: " + e.getMessage()); |
| 158 | + } |
| 159 | + } |
| 160 | + |
| 161 | + // Update tracking state |
| 162 | + recordConnection(event); |
| 163 | + |
| 164 | + return new Classification(event, Math.min(score, 100), List.copyOf(findings)); |
| 165 | + } |
| 166 | + |
| 167 | + // ───────────────────────────────────────────────────────────────────────── |
| 168 | + // Built-in heuristic rules |
| 169 | + // ───────────────────────────────────────────────────────────────────────── |
| 170 | + |
| 171 | + /** IP rate: count connections from this IP within the last RATE_WINDOW_SECS. */ |
| 172 | + private int checkIpRate(final ConnectionEvent event, final List<String> findings) |
| 173 | + { |
| 174 | + long now = Instant.now().getEpochSecond(); |
| 175 | + long cutoff = now - RATE_WINDOW_SECS; |
| 176 | + |
| 177 | + List<Long> times = ipTimestamps.computeIfAbsent(event.ip, k -> new ArrayList<>()); |
| 178 | + synchronized (times) |
| 179 | + { |
| 180 | + times.removeIf(t -> t < cutoff); |
| 181 | + int count = times.size(); |
| 182 | + |
| 183 | + if (count >= RATE_LIMIT) |
| 184 | + { |
| 185 | + findings.add("WARN IP " + event.ip + " made " + count + " connections in the last " |
| 186 | + + RATE_WINDOW_SECS + "s (threshold=" + RATE_LIMIT + ") — rate limited"); |
| 187 | + return 40; |
| 188 | + } |
| 189 | + else if (count >= RATE_LIMIT / 2) |
| 190 | + { |
| 191 | + findings.add("INFO IP " + event.ip + " connection count approaching limit (" + count + "/" + RATE_LIMIT + ")"); |
| 192 | + return 15; |
| 193 | + } |
| 194 | + } |
| 195 | + findings.add("PASS IP " + event.ip + " within connection rate"); |
| 196 | + return 0; |
| 197 | + } |
| 198 | + |
| 199 | + /** Port scan: same IP connecting to multiple distinct ports. */ |
| 200 | + private int checkPortScan(final ConnectionEvent event, final List<String> findings) |
| 201 | + { |
| 202 | + Set<Integer> ports = ipPorts.computeIfAbsent(event.ip, k -> ConcurrentHashMap.newKeySet()); |
| 203 | + ports.add(event.port); |
| 204 | + int distinct = ports.size(); |
| 205 | + |
| 206 | + if (distinct >= PORT_SCAN_THRESHOLD) |
| 207 | + { |
| 208 | + findings.add("WARN IP " + event.ip + " has probed " + distinct + " distinct ports " + ports |
| 209 | + + " — possible port scan"); |
| 210 | + return 30; |
| 211 | + } |
| 212 | + findings.add("PASS IP " + event.ip + " port probe count normal (" + distinct + ")"); |
| 213 | + return 0; |
| 214 | + } |
| 215 | + |
| 216 | + /** Geo concentration: flag when one country makes up > GEO_CONCENTRATION % of all connections. */ |
| 217 | + private int checkGeoConcentration(final ConnectionEvent event, final List<String> findings) |
| 218 | + { |
| 219 | + if (event.countryCode == null || event.countryCode.isBlank()) |
| 220 | + { |
| 221 | + findings.add("INFO no geo-location data available for " + event.ip); |
| 222 | + return 0; |
| 223 | + } |
| 224 | + |
| 225 | + int total = totalConnections + 1; // +1 for current event |
| 226 | + int fromCountry = countryCount.getOrDefault(event.countryCode, 0) + 1; |
| 227 | + int pct = (fromCountry * 100) / total; |
| 228 | + |
| 229 | + if (pct >= GEO_CONCENTRATION && total > 5) // require minimum sample |
| 230 | + { |
| 231 | + findings.add("WARN " + pct + "% of connections originate from " + event.countryCode |
| 232 | + + " (" + fromCountry + "/" + total + ") — geo concentration flag"); |
| 233 | + return 20; |
| 234 | + } |
| 235 | + findings.add("PASS geo distribution normal for " + event.countryCode + " (" + pct + "%)"); |
| 236 | + return 0; |
| 237 | + } |
| 238 | + |
| 239 | + /** Payload scan: flag known dangerous keywords in the request payload. */ |
| 240 | + private static final List<String> BAD_KEYWORDS = List.of( |
| 241 | + "exec(", "Runtime.getRuntime", "ProcessBuilder", "../", "passwd", "shadow", |
| 242 | + "<script>", "SELECT ", "DROP TABLE", "UNION SELECT" |
| 243 | + ); |
| 244 | + |
| 245 | + private int checkPayload(final ConnectionEvent event, final List<String> findings) |
| 246 | + { |
| 247 | + if (event.payload == null || event.payload.isBlank()) |
| 248 | + { |
| 249 | + findings.add("INFO no payload to inspect for " + event.ip); |
| 250 | + return 0; |
| 251 | + } |
| 252 | + |
| 253 | + String lower = event.payload.toLowerCase(); |
| 254 | + int penalty = 0; |
| 255 | + for (String kw : BAD_KEYWORDS) |
| 256 | + { |
| 257 | + if (lower.contains(kw.toLowerCase())) |
| 258 | + { |
| 259 | + findings.add("WARN payload contains flagged keyword: [" + kw + "]"); |
| 260 | + penalty += 15; |
| 261 | + } |
| 262 | + } |
| 263 | + if (penalty == 0) findings.add("PASS payload keyword scan clean"); |
| 264 | + return penalty; |
| 265 | + } |
| 266 | + |
| 267 | + /** Update tracking structures after a connection has been classified. */ |
| 268 | + private void recordConnection(final ConnectionEvent event) |
| 269 | + { |
| 270 | + long now = Instant.now().getEpochSecond(); |
| 271 | + ipTimestamps.computeIfAbsent(event.ip, k -> new ArrayList<>()).add(now); |
| 272 | + totalConnections++; |
| 273 | + if (event.countryCode != null && !event.countryCode.isBlank()) |
| 274 | + countryCount.merge(event.countryCode, 1, Integer::sum); |
| 275 | + } |
| 276 | + |
| 277 | + // ───────────────────────────────────────────────────────────────────────── |
| 278 | + // Data types |
| 279 | + // ───────────────────────────────────────────────────────────────────────── |
| 280 | + |
| 281 | + /** |
| 282 | + * Describes a single inbound connection event on any public port. |
| 283 | + * Build with the constructor or the fluent Builder. |
| 284 | + */ |
| 285 | + public static final class ConnectionEvent |
| 286 | + { |
| 287 | + public final String ip; |
| 288 | + public final int port; |
| 289 | + public final String countryCode; // ISO-3166 alpha-2, e.g. "US", "CN" — may be null |
| 290 | + public final String payload; // raw request snippet — may be null |
| 291 | + |
| 292 | + public ConnectionEvent(final String ip, final int port, final String countryCode, final String payload) |
| 293 | + { |
| 294 | + this.ip = Objects.requireNonNull(ip, "ip"); |
| 295 | + this.port = port; |
| 296 | + this.countryCode = countryCode; |
| 297 | + this.payload = payload; |
| 298 | + } |
| 299 | + |
| 300 | + /** Fluent builder for ConnectionEvent. */ |
| 301 | + public static final class Builder |
| 302 | + { |
| 303 | + private String ip; |
| 304 | + private int port; |
| 305 | + private String countryCode; |
| 306 | + private String payload; |
| 307 | + |
| 308 | + public Builder ip(final String ip) { this.ip = ip; return this; } |
| 309 | + public Builder port(final int port) { this.port = port; return this; } |
| 310 | + public Builder countryCode(final String code) { this.countryCode = code; return this; } |
| 311 | + public Builder payload(final String payload) { this.payload = payload; return this; } |
| 312 | + public ConnectionEvent build() { return new ConnectionEvent(ip, port, countryCode, payload); } |
| 313 | + } |
| 314 | + } |
| 315 | + |
| 316 | + /** |
| 317 | + * Result of classifying a single connection event. |
| 318 | + * score 0–100: higher means greater threat likelihood. |
| 319 | + */ |
| 320 | + public static final class Classification |
| 321 | + { |
| 322 | + public static final int THREAT_THRESHOLD = 40; |
| 323 | + |
| 324 | + public final ConnectionEvent event; |
| 325 | + public final int score; |
| 326 | + public final List<String> findings; |
| 327 | + public final boolean threat; |
| 328 | + |
| 329 | + Classification(final ConnectionEvent event, final int score, final List<String> findings) |
| 330 | + { |
| 331 | + this.event = event; |
| 332 | + this.score = score; |
| 333 | + this.findings = findings; |
| 334 | + this.threat = score >= THREAT_THRESHOLD; |
| 335 | + } |
| 336 | + |
| 337 | + public String summary() |
| 338 | + { |
| 339 | + StringBuilder sb = new StringBuilder(); |
| 340 | + sb.append("HeuristicClassifier [IP=").append(event.ip) |
| 341 | + .append(" port=").append(event.port) |
| 342 | + .append(" country=").append(event.countryCode) |
| 343 | + .append("] score=").append(score).append("/100 — ") |
| 344 | + .append(threat ? "THREAT" : "CLEAR").append('\n'); |
| 345 | + for (String f : findings) sb.append(" ").append(f).append('\n'); |
| 346 | + return sb.toString().stripTrailing(); |
| 347 | + } |
| 348 | + } |
| 349 | +} |
0 commit comments