103 lines
3.4 KiB
TypeScript
103 lines
3.4 KiB
TypeScript
// services/content-safety.service.ts
|
|
import { Injectable, Logger, BadRequestException } from '@nestjs/common';
|
|
import {Language} from "../../../common/interfaces/language.prompt.interface";
|
|
|
|
export interface SafetyCheckResult {
|
|
safe: boolean;
|
|
blockReason?: string;
|
|
warnings: string[];
|
|
}
|
|
|
|
@Injectable()
|
|
export class ContentSafetyService {
|
|
private readonly logger = new Logger(ContentSafetyService.name);
|
|
|
|
// HARD BLOCKS — không được phép dù bật bất cứ tone nào
|
|
private readonly HARD_BLOCK_PATTERNS = [
|
|
// Slurs (đã masked, bạn nên fill list thực tế trong production)
|
|
/\bn[i1]gg[ae]r/i,
|
|
/\bf[a4]gg[o0]t/i,
|
|
/\btr[a4]nn[yi]/i,
|
|
/\bret[a4]rd/i,
|
|
// Threats
|
|
/\b(kill|murder|shoot|behead|stab)\s+(you|him|her|them)/i,
|
|
// CSAM-related (zero tolerance)
|
|
/\b(child|minor|kid|underage)\s+(porn|sex|nude)/i,
|
|
// Doxxing patterns
|
|
/\b\d{3}-\d{2}-\d{4}\b/, // SSN format
|
|
];
|
|
|
|
// WARNING patterns — log nhưng không block
|
|
private readonly WARNING_PATTERNS = [
|
|
/\b@\w+\b/g, // mentions cụ thể (có thể là attack vào người)
|
|
];
|
|
|
|
/**
|
|
* Check input topic — có nên gen content cho topic này không?
|
|
*/
|
|
checkInput(topic: string, allowEdgy: boolean): SafetyCheckResult {
|
|
const warnings: string[] = [];
|
|
|
|
// Hard block patterns
|
|
for (const pattern of this.HARD_BLOCK_PATTERNS) {
|
|
if (pattern.test(topic)) {
|
|
return {
|
|
safe: false,
|
|
blockReason: 'Input contains prohibited content (hate speech, threats, or illegal content)',
|
|
warnings,
|
|
};
|
|
}
|
|
}
|
|
|
|
// Check mentions
|
|
const mentions = topic.match(/@\w+/g);
|
|
if (mentions && mentions.length > 0 && allowEdgy) {
|
|
warnings.push(
|
|
`Topic mentions specific users (${mentions.join(', ')}). Edgy tones target IDEAS not PEOPLE. Use carefully.`,
|
|
);
|
|
}
|
|
|
|
return { safe: true, warnings };
|
|
}
|
|
|
|
/**
|
|
* Check output — content AI đã gen có safe không?
|
|
*/
|
|
checkOutput(content: string, language: Language): SafetyCheckResult {
|
|
const warnings: string[] = [];
|
|
|
|
// Hard block check
|
|
for (const pattern of this.HARD_BLOCK_PATTERNS) {
|
|
if (pattern.test(content)) {
|
|
this.logger.error(`checkOutput ->FALSE -> Output contains prohibited language`);
|
|
this.logger.error({content});
|
|
return {
|
|
safe: false,
|
|
blockReason: 'Output contains prohibited language',
|
|
warnings,
|
|
};
|
|
}
|
|
}
|
|
|
|
// Excessive aggression heuristic
|
|
const fuckCount = (content.match(/fuck/gi) || []).length;
|
|
if (fuckCount > 5) {
|
|
warnings.push(`Very high profanity density (${fuckCount} f-words). Consider lower intensity.`);
|
|
}
|
|
|
|
return { safe: true, warnings };
|
|
}
|
|
|
|
/**
|
|
* Throw nếu unsafe (dùng cho controller).
|
|
*/
|
|
assertSafe(topic: string, allowEdgy: boolean): void {
|
|
const result = this.checkInput(topic, allowEdgy);
|
|
if (!result.safe) {
|
|
throw new BadRequestException(result.blockReason);
|
|
}
|
|
if (result.warnings.length > 0) {
|
|
this.logger.warn(`Safety warnings: ${result.warnings.join('; ')}`);
|
|
}
|
|
}
|
|
} |