Skip to content

Identifiers

rigour.ids

BIC

Bases: StdnumFormat

BIC (ISO 9362 Business identifier codes).

Source code in rigour/ids/stdnum_.py
class BIC(StdnumFormat):
    """BIC (ISO 9362 Business identifier codes)."""

    TITLE = "BIC"
    STRONG: bool = True

    impl = bic

    @classmethod
    def normalize(cls, value: str) -> Optional[str]:
        norm = super().normalize(value)
        if norm is None:
            return None
        norm = norm[:8].upper()
        if cls.is_valid(norm):
            return norm
        return None

CPF

Bases: StdnumFormat

Cadastro de Pessoas Físicas, Brazilian national identifier

Source code in rigour/ids/stdnum_.py
class CPF(StdnumFormat):
    """Cadastro de Pessoas Físicas, Brazilian national identifier"""

    TITLE = "CPF"

    impl = cpf

    @classmethod
    def format(cls, value: str) -> str:
        return str(cpf.format(value))

FIGI

Bases: StdnumFormat

A FIGI number for a security, as managed by OpenFIGI.

Source code in rigour/ids/stdnum_.py
class FIGI(StdnumFormat):
    """A FIGI number for a security, as managed by OpenFIGI."""

    TITLE = "FIGI"
    STRONG: bool = True

    impl = figi

    @classmethod
    def format(cls, value: str) -> str:
        return value.upper()

FormatSpec

Bases: TypedDict

An identifier format specification.

Source code in rigour/ids/__init__.py
class FormatSpec(TypedDict):
    """An identifier format specification."""

    title: str
    names: List[str]
    description: str

IBAN

Bases: StdnumFormat

An IBAN number for a bank account.

Source code in rigour/ids/stdnum_.py
class IBAN(StdnumFormat):
    """An IBAN number for a bank account."""

    TITLE = "IBAN"
    STRONG: bool = True

    impl = iban

IMO

Bases: IdentifierFormat

An IMO number for a ship or shipping company

Source code in rigour/ids/imo.py
class IMO(IdentifierFormat):
    """An IMO number for a ship or shipping company"""

    TITLE = "IMO"
    STRONG: bool = True

    @classmethod
    def is_valid(cls, text: str) -> bool:
        """Determine if the given string is a valid IMO number."""
        match = IMO_RE.search(text)
        if match is None:
            return False
        value = match.group(2)
        digits = [int(d) for d in value]

        # Check if it's a vessel IMO number:
        checksum = sum(d * (7 - i) for i, d in enumerate(digits[:-1])) % 10
        if checksum == digits[-1]:
            return True

        # Check if it's a company IMO number:
        checksum = digits[0] * 8 + digits[1] * 6 + digits[2] * 4
        checksum += +digits[3] * 2 + digits[4] * 9 + digits[5] * 7
        checksum = (11 - (checksum % 11)) % 10
        if checksum == digits[-1]:
            return True

        return False

    @classmethod
    def normalize(cls, text: str) -> Optional[str]:
        """Normalize the given string to a valid NPI."""
        match = IMO_RE.search(text)
        if match is None:
            return None
        value = match.group(2)
        if cls.is_valid(value):
            return f"IMO{value}"
        return None

    @classmethod
    def format(cls, value: str) -> str:
        value = value.replace(" ", "")
        if not value.startswith("IMO"):
            value = f"IMO{value}"
        return value

is_valid(text) classmethod

Determine if the given string is a valid IMO number.

Source code in rigour/ids/imo.py
@classmethod
def is_valid(cls, text: str) -> bool:
    """Determine if the given string is a valid IMO number."""
    match = IMO_RE.search(text)
    if match is None:
        return False
    value = match.group(2)
    digits = [int(d) for d in value]

    # Check if it's a vessel IMO number:
    checksum = sum(d * (7 - i) for i, d in enumerate(digits[:-1])) % 10
    if checksum == digits[-1]:
        return True

    # Check if it's a company IMO number:
    checksum = digits[0] * 8 + digits[1] * 6 + digits[2] * 4
    checksum += +digits[3] * 2 + digits[4] * 9 + digits[5] * 7
    checksum = (11 - (checksum % 11)) % 10
    if checksum == digits[-1]:
        return True

    return False

normalize(text) classmethod

Normalize the given string to a valid NPI.

Source code in rigour/ids/imo.py
@classmethod
def normalize(cls, text: str) -> Optional[str]:
    """Normalize the given string to a valid NPI."""
    match = IMO_RE.search(text)
    if match is None:
        return None
    value = match.group(2)
    if cls.is_valid(value):
        return f"IMO{value}"
    return None

INN

Bases: StdnumFormat

Russian tax identification number.

Source code in rigour/ids/stdnum_.py
class INN(StdnumFormat):
    """Russian tax identification number."""

    TITLE = "INN"

    impl = inn

    @classmethod
    def format(cls, value: str) -> str:
        return value

ISIN

Bases: StdnumFormat

An ISIN number for a security.

Source code in rigour/ids/stdnum_.py
class ISIN(StdnumFormat):
    """An ISIN number for a security."""

    TITLE = "ISIN"
    STRONG: bool = True

    impl = isin

    @classmethod
    def format(cls, value: str) -> str:
        return value.upper()

IdentifierFormat

Bases: object

Base class for identifier types.

Source code in rigour/ids/common.py
class IdentifierFormat(object):
    """Base class for identifier types."""

    TITLE: str = "Generic identifier"
    STRONG: bool = False

    @classmethod
    def is_valid(cls, value: str) -> bool:
        norm = cls.normalize(value)
        return norm is not None and len(norm) > 0

    @classmethod
    def normalize(cls, value: str) -> Optional[str]:
        return value.strip()

    @classmethod
    def format(cls, value: str) -> str:
        return value

LEI

Bases: StdnumFormat

Legal Entity Identifier (ISO 17442)

Source code in rigour/ids/stdnum_.py
class LEI(StdnumFormat):
    """Legal Entity Identifier (ISO 17442)"""

    TITLE = "LEI"
    STRONG: bool = True

    impl = lei

    @classmethod
    def format(cls, value: str) -> str:
        return value.upper()

NPI

Bases: IdentifierFormat

National Provider Identifier.

Source code in rigour/ids/npi.py
class NPI(IdentifierFormat):
    """National Provider Identifier."""

    TITLE: str = "NPI"
    STRONG: bool = True

    # cf. https://www.johndcook.com/blog/2024/06/26/npi-number/

    @classmethod
    def is_valid(cls, text: str) -> bool:
        """Determine if the given string is a valid NPI."""
        if NPI_RE.match(text) is None:
            return False

        if text in INVALID:
            return False

        if len(text) == 10:
            text = "80840" + text

        return bool(luhn.is_valid(text))

    @classmethod
    def normalize(cls, text: str) -> Optional[str]:
        """Normalize the given string to a valid NPI."""
        match = NPI_RE.search(text)
        if match is None:
            return None
        value = match.group(1)
        if cls.is_valid(value) and value not in INVALID:
            return value
        return None

is_valid(text) classmethod

Determine if the given string is a valid NPI.

Source code in rigour/ids/npi.py
@classmethod
def is_valid(cls, text: str) -> bool:
    """Determine if the given string is a valid NPI."""
    if NPI_RE.match(text) is None:
        return False

    if text in INVALID:
        return False

    if len(text) == 10:
        text = "80840" + text

    return bool(luhn.is_valid(text))

normalize(text) classmethod

Normalize the given string to a valid NPI.

Source code in rigour/ids/npi.py
@classmethod
def normalize(cls, text: str) -> Optional[str]:
    """Normalize the given string to a valid NPI."""
    match = NPI_RE.search(text)
    if match is None:
        return None
    value = match.group(1)
    if cls.is_valid(value) and value not in INVALID:
        return value
    return None

OGRN

Bases: IdentifierFormat

Primary State Registration Number (Russian company registration).

Source code in rigour/ids/ogrn.py
class OGRN(IdentifierFormat):
    """Primary State Registration Number (Russian company registration)."""

    TITLE: str = "OGRN"
    STRONG: bool = True

    # cf. https://docs.trellix.com/de-DE/bundle/data-loss-prevention-11.10.x-classification-definitions-reference-guide/page/GUID-945B4343-861E-4A57-8E60-8B6028871BA1.html

    @classmethod
    def is_valid(cls, text: str) -> bool:
        """Determine if the given string is a valid OGRN."""
        if OGRN_RE.match(text) is None:
            return False

        # Validate registration type
        if text[0] == "0":
            return False

        # Validate control digit logic
        control_digit = int(text[-1])
        return control_digit == cls.calculate_control_digit(text)

    @classmethod
    def normalize(cls, text: str) -> Optional[str]:
        """Normalize the given string to a valid OGRN."""
        match = OGRN_RE.search(text)
        if match is None:
            return None
        value = match.group(1)
        if cls.is_valid(value):
            return value
        return None

    @classmethod
    def calculate_control_digit(cls, grn: str) -> Optional[int]:
        if len(grn) == 13:
            number = int(grn[:12])
            mod_result = number % 11
            calculated_digit = mod_result if mod_result != 10 else 0
            return calculated_digit
        elif len(grn) == 15:
            number = int(grn[:14])
            mod_result = number % 13
            calculated_digit = mod_result if mod_result != 10 else 0
            return calculated_digit
        return None

is_valid(text) classmethod

Determine if the given string is a valid OGRN.

Source code in rigour/ids/ogrn.py
@classmethod
def is_valid(cls, text: str) -> bool:
    """Determine if the given string is a valid OGRN."""
    if OGRN_RE.match(text) is None:
        return False

    # Validate registration type
    if text[0] == "0":
        return False

    # Validate control digit logic
    control_digit = int(text[-1])
    return control_digit == cls.calculate_control_digit(text)

normalize(text) classmethod

Normalize the given string to a valid OGRN.

Source code in rigour/ids/ogrn.py
@classmethod
def normalize(cls, text: str) -> Optional[str]:
    """Normalize the given string to a valid OGRN."""
    match = OGRN_RE.search(text)
    if match is None:
        return None
    value = match.group(1)
    if cls.is_valid(value):
        return value
    return None

SSN

Bases: StdnumFormat

US Social Security Number

Source code in rigour/ids/stdnum_.py
class SSN(StdnumFormat):
    """US Social Security Number"""

    TITLE = "SSN"
    STRONG: bool = False

    impl = ssn

    @classmethod
    def format(cls, value: str) -> str:
        return str(ssn.format(value))

StrictFormat

Bases: IdentifierFormat

A generic identifier type that applies harsh normalization.

Source code in rigour/ids/strict.py
class StrictFormat(IdentifierFormat):
    """A generic identifier type that applies harsh normalization."""

    TITLE: str = "Strict identifier"

    @classmethod
    def is_valid(cls, value: str) -> bool:
        norm = cls.normalize(value)
        return norm is not None and len(norm) > 2

    @classmethod
    def normalize(cls, value: str) -> Optional[str]:
        ascii = ascii_text(value)
        if ascii is None or len(ascii) < 2:
            return None
        chars = [c for c in ascii if c.isalnum()]
        return "".join(chars).upper()

UEI

Bases: IdentifierFormat

US GSA Unique Entity ID.

Source code in rigour/ids/uei.py
class UEI(IdentifierFormat):
    """US GSA Unique Entity ID."""

    # https://www.gsa.gov/about-us/organization/federal-acquisition-service/integrated-award-environment-iae/iae-systems-information-kit/uei-technical-specifications-and-api-information

    TITLE: str = "NPI"
    STRONG: bool = False
    """Marked false because the SAM database is massively duplicated, and entities in 
    SAM conflate companies and their owners. This makes UEIs more like cluster IDs than
    unique entity identifiers."""

    @classmethod
    def is_valid(cls, text: str) -> bool:
        """Determine if the given string is a valid NPI."""
        if UEI_RE.match(text) is None:
            return False

        if text.startswith("0"):
            return False

        # TODO: Figure out checksum scheme
        return True

    @classmethod
    def normalize(cls, text: str) -> Optional[str]:
        """Normalize the given string to a valid NPI."""
        match = UEI_RE.search(text)
        if match is None:
            return None
        value = match.group(1)
        if not cls.is_valid(value):
            return None
        return value.upper()

    @classmethod
    def format(cls, value: str) -> str:
        return value.upper()

STRONG = False class-attribute instance-attribute

Marked false because the SAM database is massively duplicated, and entities in SAM conflate companies and their owners. This makes UEIs more like cluster IDs than unique entity identifiers.

is_valid(text) classmethod

Determine if the given string is a valid NPI.

Source code in rigour/ids/uei.py
@classmethod
def is_valid(cls, text: str) -> bool:
    """Determine if the given string is a valid NPI."""
    if UEI_RE.match(text) is None:
        return False

    if text.startswith("0"):
        return False

    # TODO: Figure out checksum scheme
    return True

normalize(text) classmethod

Normalize the given string to a valid NPI.

Source code in rigour/ids/uei.py
@classmethod
def normalize(cls, text: str) -> Optional[str]:
    """Normalize the given string to a valid NPI."""
    match = UEI_RE.search(text)
    if match is None:
        return None
    value = match.group(1)
    if not cls.is_valid(value):
        return None
    return value.upper()

WikidataQID

Bases: IdentifierFormat

A wikidata item identifier.

Source code in rigour/ids/wikidata.py
class WikidataQID(IdentifierFormat):
    """A wikidata item identifier."""

    TITLE: str = "Wikidata QID"
    STRONG: bool = True

    @classmethod
    def is_valid(cls, text: str) -> bool:
        """Determine if the given string is a valid wikidata QID."""
        return is_qid(text)

    @classmethod
    def normalize(cls, text: str) -> Optional[str]:
        """Normalize the given string to a valid wikidata QID."""
        text = text.rsplit("/", 1)[-1].strip().upper()
        match = QID.match(text)
        if match is None:
            return None
        return text

is_valid(text) classmethod

Determine if the given string is a valid wikidata QID.

Source code in rigour/ids/wikidata.py
@classmethod
def is_valid(cls, text: str) -> bool:
    """Determine if the given string is a valid wikidata QID."""
    return is_qid(text)

normalize(text) classmethod

Normalize the given string to a valid wikidata QID.

Source code in rigour/ids/wikidata.py
@classmethod
def normalize(cls, text: str) -> Optional[str]:
    """Normalize the given string to a valid wikidata QID."""
    text = text.rsplit("/", 1)[-1].strip().upper()
    match = QID.match(text)
    if match is None:
        return None
    return text

get_identifier_format(name)

Get the identifier type class for the given format name.

Source code in rigour/ids/__init__.py
def get_identifier_format(name: str) -> Type[IdentifierFormat]:
    """Get the identifier type class for the given format name."""
    return FORMATS[name]

get_identifier_format_names()

Get a list of all identifier type names.

Source code in rigour/ids/__init__.py
def get_identifier_format_names() -> List[str]:
    """Get a list of all identifier type names."""
    return list(FORMATS.keys())

get_identifier_formats()

Get a list of all identifier formats.

Source code in rigour/ids/__init__.py
def get_identifier_formats() -> List[FormatSpec]:
    """Get a list of all identifier formats."""
    formats: List[FormatSpec] = []
    for type_ in set(FORMATS.values()):
        names = [name for name, cls in FORMATS.items() if cls == type_]
        fmt: FormatSpec = {
            "names": names,
            "title": type_.TITLE,
            "description": type_.__doc__ or "",
        }
        formats.append(fmt)
    return sorted(formats, key=lambda f: f["title"])

get_strong_format_names() cached

Get a list of all strong identifier type names.

Source code in rigour/ids/__init__.py
@cache
def get_strong_format_names() -> List[str]:
    """Get a list of all strong identifier type names."""
    return [name for name, cls in FORMATS.items() if cls.STRONG]