Handling of person, organisation and object identifiers. This module contains a collection of validation
and formatting tools for identifiers. The IdentifierFormat
class is the base class for all identifier formats,
and it provides a common interface for validation and formatting.
Currently, identifers can be accessed using short aliases, such as "imo" or "isin". In the future, we will
need to introduce a proper, structured identification scheme for identifiers, with qualifiers for country
(e.g. ru:nalog:inn
, us:sam:uei
).
BIC
Bases: IdentifierFormat
BIC (ISO 9362 Business identifier codes).
Source code in rigour/ids/stdnum_.py
| class BIC(IdentifierFormat):
"""BIC (ISO 9362 Business identifier codes)."""
TITLE = "BIC"
STRONG: bool = True
@classmethod
def is_valid(cls, value: str) -> bool:
return bic.is_valid(value)
@classmethod
def normalize(cls, value: str) -> Optional[str]:
try:
norm = bic.compact(bic.validate(value))
norm = norm[:8].upper()
if not cls.is_valid(norm):
return None
return norm
except ValidationError:
return None
@classmethod
def format(cls, value: str) -> str:
return bic.format(value)
|
CPF
Bases: IdentifierFormat
Cadastro de Pessoas Físicas, Brazilian national identifier
Source code in rigour/ids/stdnum_.py
| class CPF(IdentifierFormat):
"""Cadastro de Pessoas Físicas, Brazilian national identifier"""
TITLE = "CPF"
@classmethod
def is_valid(cls, value: str) -> bool:
return cpf.is_valid(value)
@classmethod
def normalize(cls, value: str) -> Optional[str]:
try:
return cpf.compact(cpf.validate(value))
except ValidationError:
return None
@classmethod
def format(cls, value: str) -> str:
return cpf.format(value)
|
FIGI
Bases: IdentifierFormat
A FIGI number for a security, as managed by OpenFIGI.
Source code in rigour/ids/stdnum_.py
| class FIGI(IdentifierFormat):
"""A FIGI number for a security, as managed by OpenFIGI."""
TITLE = "FIGI"
STRONG: bool = True
impl = figi
@classmethod
def is_valid(cls, value: str) -> bool:
return figi.is_valid(value)
@classmethod
def normalize(cls, value: str) -> Optional[str]:
try:
return figi.compact(figi.validate(value))
except ValidationError:
return None
|
Bases: TypedDict
An identifier format specification.
Source code in rigour/ids/__init__.py
| class FormatSpec(TypedDict):
"""An identifier format specification."""
title: str
names: List[str]
description: str
|
IBAN
Bases: IdentifierFormat
An IBAN number for a bank account.
Source code in rigour/ids/stdnum_.py
| class IBAN(IdentifierFormat):
"""An IBAN number for a bank account."""
TITLE = "IBAN"
STRONG: bool = True
@classmethod
def is_valid(cls, value: str) -> bool:
return iban.is_valid(value)
@classmethod
def normalize(cls, value: str) -> Optional[str]:
try:
return iban.compact(iban.validate(value))
except ValidationError:
return None
@classmethod
def format(cls, value: str) -> str:
return iban.format(value)
|
IMO
Bases: IdentifierFormat
An IMO number for a ship or shipping company
Source code in rigour/ids/imo.py
| class IMO(IdentifierFormat):
"""An IMO number for a ship or shipping company"""
TITLE = "IMO"
STRONG: bool = True
@classmethod
def is_valid(cls, text: str) -> bool:
"""Determine if the given string is a valid IMO number."""
match = IMO_RE.search(text)
if match is None:
return False
value = match.group(2)
digits = [int(d) for d in value]
# Check if it's a vessel IMO number:
checksum = sum(d * (7 - i) for i, d in enumerate(digits[:-1])) % 10
if checksum == digits[-1]:
return True
# Check if it's a company IMO number:
checksum = digits[0] * 8 + digits[1] * 6 + digits[2] * 4
checksum += +digits[3] * 2 + digits[4] * 9 + digits[5] * 7
checksum = (11 - (checksum % 11)) % 10
if checksum == digits[-1]:
return True
return False
@classmethod
def normalize(cls, text: str) -> Optional[str]:
"""Normalize the given string to a valid NPI."""
match = IMO_RE.search(text)
if match is None:
return None
value = match.group(2)
if cls.is_valid(value):
return f"IMO{value}"
return None
@classmethod
def format(cls, value: str) -> str:
value = value.replace(" ", "")
if not value.startswith("IMO"):
value = f"IMO{value}"
return value
|
is_valid(text)
classmethod
Determine if the given string is a valid IMO number.
Source code in rigour/ids/imo.py
| @classmethod
def is_valid(cls, text: str) -> bool:
"""Determine if the given string is a valid IMO number."""
match = IMO_RE.search(text)
if match is None:
return False
value = match.group(2)
digits = [int(d) for d in value]
# Check if it's a vessel IMO number:
checksum = sum(d * (7 - i) for i, d in enumerate(digits[:-1])) % 10
if checksum == digits[-1]:
return True
# Check if it's a company IMO number:
checksum = digits[0] * 8 + digits[1] * 6 + digits[2] * 4
checksum += +digits[3] * 2 + digits[4] * 9 + digits[5] * 7
checksum = (11 - (checksum % 11)) % 10
if checksum == digits[-1]:
return True
return False
|
normalize(text)
classmethod
Normalize the given string to a valid NPI.
Source code in rigour/ids/imo.py
| @classmethod
def normalize(cls, text: str) -> Optional[str]:
"""Normalize the given string to a valid NPI."""
match = IMO_RE.search(text)
if match is None:
return None
value = match.group(2)
if cls.is_valid(value):
return f"IMO{value}"
return None
|
INN
Bases: IdentifierFormat
Russian tax identification number.
Source code in rigour/ids/stdnum_.py
| class INN(IdentifierFormat):
"""Russian tax identification number."""
TITLE = "INN"
@classmethod
def is_valid(cls, value: str) -> bool:
return inn.is_valid(value)
@classmethod
def normalize(cls, value: str) -> Optional[str]:
try:
return inn.compact(inn.validate(value))
except ValidationError:
return None
|
ISIN
Bases: IdentifierFormat
An ISIN number for a security.
Source code in rigour/ids/stdnum_.py
| class ISIN(IdentifierFormat):
"""An ISIN number for a security."""
TITLE = "ISIN"
STRONG: bool = True
@classmethod
def is_valid(cls, value: str) -> bool:
return isin.is_valid(value)
@classmethod
def normalize(cls, value: str) -> Optional[str]:
try:
return isin.compact(isin.validate(value))
except ValidationError:
return None
|
Bases: object
Base class for identifier types.
Source code in rigour/ids/common.py
| class IdentifierFormat(object):
"""Base class for identifier types."""
TITLE: str = "Generic identifier"
STRONG: bool = False
@classmethod
def is_valid(cls, value: str) -> bool:
norm = cls.normalize(value)
return norm is not None and len(norm) > 0
@classmethod
def normalize(cls, value: str) -> Optional[str]:
return value.strip()
@classmethod
def format(cls, value: str) -> str:
return value.upper()
|
LEI
Bases: IdentifierFormat
Legal Entity Identifier (ISO 17442)
Source code in rigour/ids/stdnum_.py
| class LEI(IdentifierFormat):
"""Legal Entity Identifier (ISO 17442)"""
TITLE = "LEI"
STRONG: bool = True
@classmethod
def is_valid(cls, value: str) -> bool:
return lei.is_valid(value)
@classmethod
def normalize(cls, value: str) -> Optional[str]:
try:
return lei.compact(lei.validate(value))
except ValidationError:
return None
|
NPI
Bases: IdentifierFormat
National Provider Identifier.
Source code in rigour/ids/npi.py
| class NPI(IdentifierFormat):
"""National Provider Identifier."""
TITLE: str = "NPI"
STRONG: bool = True
# cf. https://www.johndcook.com/blog/2024/06/26/npi-number/
@classmethod
def is_valid(cls, text: str) -> bool:
"""Determine if the given string is a valid NPI."""
if NPI_RE.match(text) is None:
return False
if text in INVALID:
return False
if len(text) == 10:
text = "80840" + text
return bool(luhn.is_valid(text))
@classmethod
def normalize(cls, text: str) -> Optional[str]:
"""Normalize the given string to a valid NPI."""
match = NPI_RE.search(text)
if match is None:
return None
value = match.group(1)
if cls.is_valid(value) and value not in INVALID:
return value
return None
|
is_valid(text)
classmethod
Determine if the given string is a valid NPI.
Source code in rigour/ids/npi.py
| @classmethod
def is_valid(cls, text: str) -> bool:
"""Determine if the given string is a valid NPI."""
if NPI_RE.match(text) is None:
return False
if text in INVALID:
return False
if len(text) == 10:
text = "80840" + text
return bool(luhn.is_valid(text))
|
normalize(text)
classmethod
Normalize the given string to a valid NPI.
Source code in rigour/ids/npi.py
| @classmethod
def normalize(cls, text: str) -> Optional[str]:
"""Normalize the given string to a valid NPI."""
match = NPI_RE.search(text)
if match is None:
return None
value = match.group(1)
if cls.is_valid(value) and value not in INVALID:
return value
return None
|
OGRN
Bases: IdentifierFormat
Primary State Registration Number (Russian company registration).
Source code in rigour/ids/ogrn.py
| class OGRN(IdentifierFormat):
"""Primary State Registration Number (Russian company registration)."""
TITLE: str = "OGRN"
STRONG: bool = True
# cf. https://docs.trellix.com/de-DE/bundle/data-loss-prevention-11.10.x-classification-definitions-reference-guide/page/GUID-945B4343-861E-4A57-8E60-8B6028871BA1.html
@classmethod
def is_valid(cls, text: str) -> bool:
"""Determine if the given string is a valid OGRN."""
if OGRN_RE.match(text) is None:
return False
# Validate registration type
if text[0] == "0":
return False
# Validate control digit logic
control_digit = int(text[-1])
return control_digit == cls.calculate_control_digit(text)
@classmethod
def normalize(cls, text: str) -> Optional[str]:
"""Normalize the given string to a valid OGRN."""
match = OGRN_RE.search(text)
if match is None:
return None
value = match.group(1)
if cls.is_valid(value):
return value
return None
@classmethod
def calculate_control_digit(cls, grn: str) -> Optional[int]:
if len(grn) == 13:
number = int(grn[:12])
mod_result = number % 11
calculated_digit = mod_result if mod_result != 10 else 0
return calculated_digit
elif len(grn) == 15:
number = int(grn[:14])
mod_result = number % 13
calculated_digit = mod_result if mod_result != 10 else 0
return calculated_digit
return None
|
is_valid(text)
classmethod
Determine if the given string is a valid OGRN.
Source code in rigour/ids/ogrn.py
| @classmethod
def is_valid(cls, text: str) -> bool:
"""Determine if the given string is a valid OGRN."""
if OGRN_RE.match(text) is None:
return False
# Validate registration type
if text[0] == "0":
return False
# Validate control digit logic
control_digit = int(text[-1])
return control_digit == cls.calculate_control_digit(text)
|
normalize(text)
classmethod
Normalize the given string to a valid OGRN.
Source code in rigour/ids/ogrn.py
| @classmethod
def normalize(cls, text: str) -> Optional[str]:
"""Normalize the given string to a valid OGRN."""
match = OGRN_RE.search(text)
if match is None:
return None
value = match.group(1)
if cls.is_valid(value):
return value
return None
|
SSN
Bases: IdentifierFormat
US Social Security Number
Source code in rigour/ids/stdnum_.py
| class SSN(IdentifierFormat):
"""US Social Security Number"""
TITLE = "SSN"
STRONG: bool = False
@classmethod
def is_valid(cls, value: str) -> bool:
return ssn.is_valid(value)
@classmethod
def normalize(cls, value: str) -> Optional[str]:
try:
return ssn.compact(ssn.validate(value))
except ValidationError:
return None
@classmethod
def format(cls, value: str) -> str:
return ssn.format(value)
|
Bases: IdentifierFormat
A generic identifier type that applies harsh normalization.
Source code in rigour/ids/strict.py
| class StrictFormat(IdentifierFormat):
"""A generic identifier type that applies harsh normalization."""
TITLE: str = "Strict identifier"
@classmethod
def is_valid(cls, value: str) -> bool:
norm = cls.normalize(value)
return norm is not None and len(norm) > 2
@classmethod
def normalize(cls, value: str) -> Optional[str]:
ascii = ascii_text(value)
if ascii is None or len(ascii) < 2:
return None
chars = [c for c in ascii if c.isalnum()]
return "".join(chars).upper()
|
UEI
Bases: IdentifierFormat
US GSA Unique Entity ID.
Source code in rigour/ids/uei.py
| class UEI(IdentifierFormat):
"""US GSA Unique Entity ID."""
# https://www.gsa.gov/about-us/organization/federal-acquisition-service/integrated-award-environment-iae/iae-systems-information-kit/uei-technical-specifications-and-api-information
TITLE: str = "NPI"
STRONG: bool = False
"""Marked false because the SAM database is massively duplicated, and entities in
SAM conflate companies and their owners. This makes UEIs more like cluster IDs than
unique entity identifiers."""
@classmethod
def is_valid(cls, text: str) -> bool:
"""Determine if the given string is a valid NPI."""
if UEI_RE.match(text) is None:
return False
if text.startswith("0"):
return False
# TODO: Figure out checksum scheme
return True
@classmethod
def normalize(cls, text: str) -> Optional[str]:
"""Normalize the given string to a valid NPI."""
match = UEI_RE.search(text)
if match is None:
return None
value = match.group(1)
if not cls.is_valid(value):
return None
return value.upper()
@classmethod
def format(cls, value: str) -> str:
return value.upper()
|
STRONG = False
class-attribute
instance-attribute
Marked false because the SAM database is massively duplicated, and entities in
SAM conflate companies and their owners. This makes UEIs more like cluster IDs than
unique entity identifiers.
is_valid(text)
classmethod
Determine if the given string is a valid NPI.
Source code in rigour/ids/uei.py
| @classmethod
def is_valid(cls, text: str) -> bool:
"""Determine if the given string is a valid NPI."""
if UEI_RE.match(text) is None:
return False
if text.startswith("0"):
return False
# TODO: Figure out checksum scheme
return True
|
normalize(text)
classmethod
Normalize the given string to a valid NPI.
Source code in rigour/ids/uei.py
| @classmethod
def normalize(cls, text: str) -> Optional[str]:
"""Normalize the given string to a valid NPI."""
match = UEI_RE.search(text)
if match is None:
return None
value = match.group(1)
if not cls.is_valid(value):
return None
return value.upper()
|
WikidataQID
Bases: IdentifierFormat
A wikidata item identifier.
Source code in rigour/ids/wikidata.py
| class WikidataQID(IdentifierFormat):
"""A wikidata item identifier."""
TITLE: str = "Wikidata QID"
STRONG: bool = True
@classmethod
def is_valid(cls, text: str) -> bool:
"""Determine if the given string is a valid wikidata QID."""
return is_qid(text)
@classmethod
def normalize(cls, text: str) -> Optional[str]:
"""Normalize the given string to a valid wikidata QID."""
text = text.rsplit("/", 1)[-1].strip().upper()
match = QID.match(text)
if match is None:
return None
return text
|
is_valid(text)
classmethod
Determine if the given string is a valid wikidata QID.
Source code in rigour/ids/wikidata.py
| @classmethod
def is_valid(cls, text: str) -> bool:
"""Determine if the given string is a valid wikidata QID."""
return is_qid(text)
|
normalize(text)
classmethod
Normalize the given string to a valid wikidata QID.
Source code in rigour/ids/wikidata.py
| @classmethod
def normalize(cls, text: str) -> Optional[str]:
"""Normalize the given string to a valid wikidata QID."""
text = text.rsplit("/", 1)[-1].strip().upper()
match = QID.match(text)
if match is None:
return None
return text
|
Get the identifier type class for the given format name.
Source code in rigour/ids/__init__.py
| def get_identifier_format(name: str) -> Type[IdentifierFormat]:
"""Get the identifier type class for the given format name."""
return FORMATS[name]
|
Get a list of all identifier type names.
Source code in rigour/ids/__init__.py
| def get_identifier_format_names() -> List[str]:
"""Get a list of all identifier type names."""
return list(FORMATS.keys())
|
Get a list of all identifier formats.
Source code in rigour/ids/__init__.py
| def get_identifier_formats() -> List[FormatSpec]:
"""Get a list of all identifier formats."""
formats: List[FormatSpec] = []
for type_ in set(FORMATS.values()):
names = [name for name, cls in FORMATS.items() if cls == type_]
fmt: FormatSpec = {
"names": names,
"title": type_.TITLE,
"description": type_.__doc__ or "",
}
formats.append(fmt)
return sorted(formats, key=lambda f: f["title"])
|
Get a list of all strong identifier type names.
Source code in rigour/ids/__init__.py
| @cache
def get_strong_format_names() -> List[str]:
"""Get a list of all strong identifier type names."""
return [name for name, cls in FORMATS.items() if cls.STRONG]
|