diff --git a/README.md b/README.md index 9fe35e2..9e081e2 100644 --- a/README.md +++ b/README.md @@ -29,18 +29,22 @@ import ( func main() { goaway.IsProfane("fuck this shit") // returns true goaway.ExtractProfanity("fuck this shit") // returns "fuck" + goaway.ExtractAllProfanities("fuck this shit") // returns ["fuck", "shit"] goaway.Censor("fuck this shit") // returns "**** this ****" goaway.IsProfane("F u C k th1$ $h!t") // returns true goaway.ExtractProfanity("F u C k th1$ $h!t") // returns "fuck" + goaway.ExtractAllProfanities("F u C k th1$ $h!t") // returns ["fuck", "shit"] goaway.Censor("F u C k th1$ $h!t") // returns "* * * * th1$ ****" goaway.IsProfane("@$$h073") // returns true goaway.ExtractProfanity("@$$h073") // returns "asshole" + goaway.ExtractAllProfanities("@$$h073") // returns ["ass", "asshole"] goaway.Censor("@$$h073") // returns "*******" goaway.IsProfane("hello, world!") // returns false goaway.ExtractProfanity("hello, world!") // returns "" + goaway.ExtractAllProfanities("hello, world!") // returns [] goaway.Censor("hello, world!") // returns "hello, world!" buf := &bytes.Buffer{} @@ -52,12 +56,13 @@ func main() { } ``` -Calling `goaway.IsProfane(s)`, `goaway.ExtractProfanity(s)` or `goaway.Censor(s)` will use the default profanity detector, +Calling `goaway.IsProfane(s)`, `goaway.ExtractProfanity(s)`, `goaway.ExtractAllProfanities(s)` or `goaway.Censor(s)` will use the default profanity detector, but if you'd like to disable leet speak, numerical character or special character sanitization, you have to create a ProfanityDetector instead: ```go profanityDetector := goaway.NewProfanityDetector().WithSanitizeLeetSpeak(false).WithSanitizeSpecialCharacters(false).WithSanitizeAccents(false) profanityDetector.IsProfane("b!tch") // returns false because we're not sanitizing special characters +profanityDetector.ExtractAllProfanities("fuck this shit") // returns ["fuck", "shit"] ``` You can also disable the default behavior of white space sanitization like so: ```go diff --git a/goaway.go b/goaway.go index 54850a1..99583b5 100644 --- a/goaway.go +++ b/goaway.go @@ -161,6 +161,48 @@ func (g *ProfanityDetector) ExtractProfanity(s string) string { return "" } +// ExtractAllProfanities takes in a string (word or sentence) and looks for profanities. +// Returns all profanities found as a slice of strings, or an empty slice if none are found. +// Duplicates are removed from the result. +func (g *ProfanityDetector) ExtractAllProfanities(s string) []string { + s, _ = g.sanitize(s, false) + profanitiesFound := make(map[string]bool) + + // Check for false negatives + for _, word := range g.falseNegatives { + if match := strings.Contains(s, word); match { + profanitiesFound[word] = true + } + } + // Remove false positives + for _, word := range g.falsePositives { + s = strings.Replace(s, word, "", -1) + } + + if g.exactWord { + tokens := strings.Split(s, space) + for _, token := range tokens { + if sliceContains(g.profanities, token) { + profanitiesFound[token] = true + } + } + } else { + // Check for profanities + for _, word := range g.profanities { + if match := strings.Contains(s, word); match { + profanitiesFound[word] = true + } + } + } + + // Convert map to slice + result := make([]string, 0, len(profanitiesFound)) + for word := range profanitiesFound { + result = append(result, word) + } + return result +} + func sliceContains(words []string, s string) bool { for _, word := range words { if strings.EqualFold(s, word) { @@ -353,6 +395,18 @@ func ExtractProfanity(s string) string { return defaultProfanityDetector.ExtractProfanity(s) } +// ExtractAllProfanities takes in a string (word or sentence) and looks for profanities. +// Returns all profanities found as a slice of strings, or an empty slice if none are found. +// Duplicates are removed from the result. +// +// Uses the default ProfanityDetector +func ExtractAllProfanities(s string) []string { + if defaultProfanityDetector == nil { + defaultProfanityDetector = NewProfanityDetector() + } + return defaultProfanityDetector.ExtractAllProfanities(s) +} + // Censor takes in a string (word or sentence) and tries to censor all profanities found. // // Uses the default ProfanityDetector diff --git a/goaway_test.go b/goaway_test.go index 4df76ef..6076503 100644 --- a/goaway_test.go +++ b/goaway_test.go @@ -1,6 +1,7 @@ package goaway import ( + "sort" "testing" ) @@ -37,6 +38,102 @@ func TestExtractProfanity(t *testing.T) { } } +func TestExtractAllProfanities(t *testing.T) { + defaultProfanityDetector = nil + tests := []struct { + input string + expectedProfanities []string + }{ + { + input: "fuck this shit", + expectedProfanities: []string{"fuck", "shit"}, + }, + { + input: "F u C k th1$ $h!t", + expectedProfanities: []string{"fuck", "shit"}, + }, + { + input: "@$$h073", + expectedProfanities: []string{"ass", "asshole"}, + }, + { + input: "hello, world!", + expectedProfanities: []string{}, + }, + { + input: "fuck shit fuck", + expectedProfanities: []string{"fuck", "shit"}, + }, + { + input: "one penis, two vaginas, three dicks", + expectedProfanities: []string{"penis", "vagina", "dick"}, + }, + } + for _, tt := range tests { + t.Run(tt.input, func(t *testing.T) { + profanities := ExtractAllProfanities(tt.input) + if !equalStringSlices(profanities, tt.expectedProfanities) { + t.Errorf("expected %v, got %v", tt.expectedProfanities, profanities) + } + }) + } +} + +func TestProfanityDetector_ExtractAllProfanities(t *testing.T) { + tests := []struct { + name string + detector *ProfanityDetector + input string + expectedProfanities []string + }{ + { + name: "with exact word matching", + detector: NewProfanityDetector().WithExactWord(true), + input: "fuck shit", + expectedProfanities: []string{"fuck", "shit"}, + }, + { + name: "with exact word matching - no match for substring", + detector: NewProfanityDetector().WithExactWord(true), + input: "fuckthis", + expectedProfanities: []string{}, + }, + { + name: "without exact word matching - matches substring", + detector: NewProfanityDetector().WithExactWord(false), + input: "fuckthis", + expectedProfanities: []string{"fuck"}, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + profanities := tt.detector.ExtractAllProfanities(tt.input) + if !equalStringSlices(profanities, tt.expectedProfanities) { + t.Errorf("expected %v, got %v", tt.expectedProfanities, profanities) + } + }) + } +} + +// equalStringSlices checks if two string slices contain the same elements (order doesn't matter) +func equalStringSlices(a, b []string) bool { + if len(a) != len(b) { + return false + } + aCopy := make([]string, len(a)) + bCopy := make([]string, len(b)) + copy(aCopy, a) + copy(bCopy, b) + sort.Strings(aCopy) + sort.Strings(bCopy) + for i := range aCopy { + if aCopy[i] != bCopy[i] { + return false + } + } + return true +} + func TestProfanityDetector_Censor(t *testing.T) { defaultProfanityDetector = nil profanityDetectorWithSanitizeSpaceDisabled := NewProfanityDetector().WithSanitizeSpaces(false)