Products

SIGN UPLOG IN

Text Moderation / Guides

Text Moderation - Quick start guide

Rule-based Text Moderation with Sightengine

Simply send a POST request containing the UTF-8 formatted text along with the ISO 639-1 language code (such as en for english) and the comma-separated list of countries for phone number detection (such as us,gb,fr for the United States, United Kingdom and France). Here is an example:


curl -X POST 'https://api.sightengine.com/1.0/text/check.json' \
  -F 'text=Contact rick(at)gmail(dot)com to have s_*_x' \
  -F 'lang=en' \
  -F 'opt_countries=us,gb,fr' \
  -F 'mode=rules' \
  -F 'api_user={api_user}' \
  -F 'api_secret={api_secret}'


# this example uses requests
import requests
import json

data = {
  'text': 'Contact rick(at)gmail(dot)com to have s_*_x',
  'mode': 'rules',
  'lang': 'en',
  'opt_countries': 'us,gb,fr',
  'api_user': '{api_user}',
  'api_secret': '{api_secret}'
}
r = requests.post('https://api.sightengine.com/1.0/text/check.json', data=data)

output = json.loads(r.text)


$params = array(
  'text' => 'Contact rick(at)gmail(dot)com to have s_*_x',
  'lang' => 'en',
  'opt_countries' => 'us,gb,fr',
  'mode' => 'rules',
  'api_user' => '{api_user}',
  'api_secret' => '{api_secret}',
);

// this example uses cURL
$ch = curl_init('https://api.sightengine.com/1.0/text/check.json');
curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_POSTFIELDS, $params);
$response = curl_exec($ch);
curl_close($ch);

$output = json_decode($response, true);


// this example uses axios and form-data
const axios = require('axios');
const FormData = require('form-data');

data = new FormData();
data.append('text', 'Contact rick(at)gmail(dot)com to have s_*_x');
data.append('lang', 'en');
data.append('opt_countries', 'us,gb,fr');
data.append('mode', 'rules');
data.append('api_user', '{api_user}');
data.append('api_secret', '{api_secret}');

axios({
  url: 'https://api.sightengine.com/1.0/text/check.json',
  method:'post',
  data: data,
  headers: data.getHeaders()
})
.then(function (response) {
  // on success: handle response
  console.log(response.data);
})
.catch(function (error) {
  // handle error
  if (error.response) console.log(error.response.data);
  else console.log(error.message);
});

The request response will contain a JSON body that you can then use to determine if the text was acceptable or not.

Objectionable content is categorized into a few main categories, with each category having different types (subcategories) that you can use to determine what was found:

CategoryTypes
profanity
  • sexual for sexual content
  • discriminatory for discriminatory and derogatory content
  • insult for insults
  • grawlix for grawlix
  • inappropriate for inappropriate language
  • other_profanity for other types of profanity
Learn more
personal
  • email for email addresses
  • phone_number_** for phone numbers, where the last two characters are the country code for the matching country
  • username for usernames
  • ipv4 for IPs (version 4)
  • ipv6 for IPs (version 6)
  • ssn for US social security numbers
Learn more
link
  • url for URLs
Learn more

As an example, here is the JSON request that you would receive for the above request. The message contains a sexual term along with a slightly obfuscated email address, both of which are detected and returned:


{
  "status": "success",
  "request": {
    "id": "req_6cujQglQPgGApjI5odv0P",
    "timestamp": 1471947033.92,
    "operations": 1
  },
  "profanity": {
    "matches": [
      {
        "type": "sexual",
        "match": "sx",
        "intensity": "medium",
        "start": 38,
        "end": 42,
      }
    ]
  },
  "personal": {
    "matches": [
      {
        "type": "email",
        "match": "rick(at)gmail(dot)com",
        "start": 8,
        "end": 28
      }
    ]
  },
  "link": {
    "matches": []
  },
}

You can also create your own custom disallow list to filter custom text items, in addition to the words that we detect out-of-the-box. More on custom disallow lists

Moderating Usernames with Sightengine

Simply send a POST request containing the UTF-8 formatted username along with the ISO 639-1 language code (such as en for english). Here is an example:


curl -X POST 'https://api.sightengine.com/1.0/text/check.json' \
  -F 'text=urCr0tch' \
  -F 'lang=en' \
  -F 'mode=username' \
  -F 'api_user={api_user}' \
  -F 'api_secret={api_secret}'


# this example uses requests
import requests
import json

data = {
  'text': 'urCr0tch',
  'mode': 'username',
  'lang': 'en',
  'api_user': '{api_user}',
  'api_secret': '{api_secret}'
}
r = requests.post('https://api.sightengine.com/1.0/text/check.json', data=data)

output = json.loads(r.text)


$params = array(
  'text' => 'urCr0tch',
  'lang' => 'en',
  'mode' => 'username',
  'api_user' => '{api_user}',
  'api_secret' => '{api_secret}',
);

// this example uses cURL
$ch = curl_init('https://api.sightengine.com/1.0/text/check.json');
curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_POSTFIELDS, $params);
$response = curl_exec($ch);
curl_close($ch);

$output = json_decode($response, true);


// this example uses axios and form-data
const axios = require('axios');
const FormData = require('form-data');

data = new FormData();
data.append('text', 'urCr0tch');
data.append('lang', 'en');
data.append('mode', 'username');
data.append('api_user', '{api_user}');
data.append('api_secret', '{api_secret}');

axios({
  url: 'https://api.sightengine.com/1.0/text/check.json',
  method:'post',
  data: data,
  headers: data.getHeaders()
})
.then(function (response) {
  // on success: handle response
  console.log(response.data);
})
.catch(function (error) {
  // handle error
  if (error.response) console.log(error.response.data);
  else console.log(error.message);
});

The request response will contain a JSON body that you can then use to determine if the username was acceptable or not.

Objectionable content is categorized into a few main categories, with each category having different types (subcategories) that you can use to determine what was found:

CategoryTypes
profanity
  • sexual for sexual content
  • discriminatory for discriminatory and derogatory content
  • insult for insults
  • grawlix for grawlix
  • inappropriate for inappropriate language
  • other_profanity for other types of profanity
Learn more
misleading
  • misleading for usernames that might be misleading when viewed by other users.
Learn more
personal
  • email for email addresses
  • phone_number_** for phone numbers, where the last two characters are the country code for the matching country
  • ipv4 for IPs (version 4)
  • ipv6 for IPs (version 6)
  • ssn for US social security numbers
Learn more

As an example, here is the JSON request that you would receive for the above request. The username contains an obfuscated sexual term, which is detected and returned:


{
  "status": "success",
  "request": {
    "id": "req_6cujQglQPgGApjI5odv0P",
    "timestamp": 1471947033.92,
    "operations": 1
  },
  "profanity": {
    "matches": [
      {
        "type": "sexual",
        "match": "crotch",
        "intensity": "medium",
        "start": 0,
        "end": 7
      }
    ]
  },
  "personal": {
    "matches": []
  },
  "link": {
    "matches": []
  },
  "misleading": {
    "matches": []
  }
}

Was this page helpful?