Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,18 +29,22 @@ import (
func main() {
goaway.IsProfane("fuck this shit") // returns true
goaway.ExtractProfanity("fuck this shit") // returns "fuck"
goaway.ExtractAllProfanities("fuck this shit") // returns ["fuck", "shit"]
goaway.Censor("fuck this shit") // returns "**** this ****"

goaway.IsProfane("F u C k th1$ $h!t") // returns true
goaway.ExtractProfanity("F u C k th1$ $h!t") // returns "fuck"
goaway.ExtractAllProfanities("F u C k th1$ $h!t") // returns ["fuck", "shit"]
goaway.Censor("F u C k th1$ $h!t") // returns "* * * * th1$ ****"

goaway.IsProfane("@$$h073") // returns true
goaway.ExtractProfanity("@$$h073") // returns "asshole"
goaway.ExtractAllProfanities("@$$h073") // returns ["ass", "asshole"]
goaway.Censor("@$$h073") // returns "*******"

goaway.IsProfane("hello, world!") // returns false
goaway.ExtractProfanity("hello, world!") // returns ""
goaway.ExtractAllProfanities("hello, world!") // returns []
goaway.Censor("hello, world!") // returns "hello, world!"

buf := &bytes.Buffer{}
Expand All @@ -52,12 +56,13 @@ func main() {
}
```

Calling `goaway.IsProfane(s)`, `goaway.ExtractProfanity(s)` or `goaway.Censor(s)` will use the default profanity detector,
Calling `goaway.IsProfane(s)`, `goaway.ExtractProfanity(s)`, `goaway.ExtractAllProfanities(s)` or `goaway.Censor(s)` will use the default profanity detector,
but if you'd like to disable leet speak, numerical character or special character sanitization, you have to create a
ProfanityDetector instead:
```go
profanityDetector := goaway.NewProfanityDetector().WithSanitizeLeetSpeak(false).WithSanitizeSpecialCharacters(false).WithSanitizeAccents(false)
profanityDetector.IsProfane("b!tch") // returns false because we're not sanitizing special characters
profanityDetector.ExtractAllProfanities("fuck this shit") // returns ["fuck", "shit"]
```
You can also disable the default behavior of white space sanitization like so:
```go
Expand Down
54 changes: 54 additions & 0 deletions goaway.go
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,48 @@ func (g *ProfanityDetector) ExtractProfanity(s string) string {
return ""
}

// ExtractAllProfanities takes in a string (word or sentence) and looks for profanities.
// Returns all profanities found as a slice of strings, or an empty slice if none are found.
// Duplicates are removed from the result.
func (g *ProfanityDetector) ExtractAllProfanities(s string) []string {
s, _ = g.sanitize(s, false)
profanitiesFound := make(map[string]bool)

// Check for false negatives
for _, word := range g.falseNegatives {
if match := strings.Contains(s, word); match {
profanitiesFound[word] = true
}
}
// Remove false positives
for _, word := range g.falsePositives {
s = strings.Replace(s, word, "", -1)
}

if g.exactWord {
tokens := strings.Split(s, space)
for _, token := range tokens {
if sliceContains(g.profanities, token) {
profanitiesFound[token] = true
}
}
} else {
// Check for profanities
for _, word := range g.profanities {
if match := strings.Contains(s, word); match {
profanitiesFound[word] = true
}
}
}

// Convert map to slice
result := make([]string, 0, len(profanitiesFound))
for word := range profanitiesFound {
result = append(result, word)
}
return result
}

func sliceContains(words []string, s string) bool {
for _, word := range words {
if strings.EqualFold(s, word) {
Expand Down Expand Up @@ -353,6 +395,18 @@ func ExtractProfanity(s string) string {
return defaultProfanityDetector.ExtractProfanity(s)
}

// ExtractAllProfanities takes in a string (word or sentence) and looks for profanities.
// Returns all profanities found as a slice of strings, or an empty slice if none are found.
// Duplicates are removed from the result.
//
// Uses the default ProfanityDetector
func ExtractAllProfanities(s string) []string {
if defaultProfanityDetector == nil {
defaultProfanityDetector = NewProfanityDetector()
}
return defaultProfanityDetector.ExtractAllProfanities(s)
}

// Censor takes in a string (word or sentence) and tries to censor all profanities found.
//
// Uses the default ProfanityDetector
Expand Down
97 changes: 97 additions & 0 deletions goaway_test.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package goaway

import (
"sort"
"testing"
)

Expand Down Expand Up @@ -37,6 +38,102 @@ func TestExtractProfanity(t *testing.T) {
}
}

func TestExtractAllProfanities(t *testing.T) {
defaultProfanityDetector = nil
tests := []struct {
input string
expectedProfanities []string
}{
{
input: "fuck this shit",
expectedProfanities: []string{"fuck", "shit"},
},
{
input: "F u C k th1$ $h!t",
expectedProfanities: []string{"fuck", "shit"},
},
{
input: "@$$h073",
expectedProfanities: []string{"ass", "asshole"},
},
{
input: "hello, world!",
expectedProfanities: []string{},
},
{
input: "fuck shit fuck",
expectedProfanities: []string{"fuck", "shit"},
},
{
input: "one penis, two vaginas, three dicks",
expectedProfanities: []string{"penis", "vagina", "dick"},
},
}
for _, tt := range tests {
t.Run(tt.input, func(t *testing.T) {
profanities := ExtractAllProfanities(tt.input)
if !equalStringSlices(profanities, tt.expectedProfanities) {
t.Errorf("expected %v, got %v", tt.expectedProfanities, profanities)
}
})
}
}

func TestProfanityDetector_ExtractAllProfanities(t *testing.T) {
tests := []struct {
name string
detector *ProfanityDetector
input string
expectedProfanities []string
}{
{
name: "with exact word matching",
detector: NewProfanityDetector().WithExactWord(true),
input: "fuck shit",
expectedProfanities: []string{"fuck", "shit"},
},
{
name: "with exact word matching - no match for substring",
detector: NewProfanityDetector().WithExactWord(true),
input: "fuckthis",
expectedProfanities: []string{},
},
{
name: "without exact word matching - matches substring",
detector: NewProfanityDetector().WithExactWord(false),
input: "fuckthis",
expectedProfanities: []string{"fuck"},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
profanities := tt.detector.ExtractAllProfanities(tt.input)
if !equalStringSlices(profanities, tt.expectedProfanities) {
t.Errorf("expected %v, got %v", tt.expectedProfanities, profanities)
}
})
}
}

// equalStringSlices checks if two string slices contain the same elements (order doesn't matter)
func equalStringSlices(a, b []string) bool {
if len(a) != len(b) {
return false
}
aCopy := make([]string, len(a))
bCopy := make([]string, len(b))
copy(aCopy, a)
copy(bCopy, b)
sort.Strings(aCopy)
sort.Strings(bCopy)
for i := range aCopy {
if aCopy[i] != bCopy[i] {
return false
}
}
return true
}

func TestProfanityDetector_Censor(t *testing.T) {
defaultProfanityDetector = nil
profanityDetectorWithSanitizeSpaceDisabled := NewProfanityDetector().WithSanitizeSpaces(false)
Expand Down