Skip to content

Identifiers

rigour.ids

BIC

Bases: StdnumFormat

BIC (ISO 9362 Business identifier codes).

Source code in rigour/ids/stdnum_.py
class BIC(StdnumFormat):
    """BIC (ISO 9362 Business identifier codes)."""

    TITLE = "BIC"
    STRONG: bool = True

    impl = bic

    @classmethod
    def normalize(cls, value: str) -> Optional[str]:
        norm = super().normalize(value)
        if norm is not None:
            norm = norm[:8]
            if cls.is_valid(norm):
                return norm
        return None

FIGI

Bases: StdnumFormat

A FIGI number for a security, as managed by OpenFIGI.

Source code in rigour/ids/stdnum_.py
class FIGI(StdnumFormat):
    """A FIGI number for a security, as managed by OpenFIGI."""

    TITLE = "FIGI"
    STRONG: bool = True

    impl = figi

    @classmethod
    def format(cls, value: str) -> str:
        return value.upper()

FormatSpec

Bases: TypedDict

An identifier format specification.

Source code in rigour/ids/__init__.py
class FormatSpec(TypedDict):
    """An identifier format specification."""

    title: str
    names: List[str]
    description: str

IBAN

Bases: StdnumFormat

An IBAN number for a bank account.

Source code in rigour/ids/stdnum_.py
class IBAN(StdnumFormat):
    """An IBAN number for a bank account."""

    TITLE = "IBAN"
    STRONG: bool = True

    impl = iban

IMO

Bases: StdnumFormat

An IMO number for a ship.

Source code in rigour/ids/stdnum_.py
class IMO(StdnumFormat):
    """An IMO number for a ship."""

    TITLE = "IMO"
    STRONG: bool = True

    impl = imo

INN

Bases: StdnumFormat

Russian tax identification number.

Source code in rigour/ids/stdnum_.py
class INN(StdnumFormat):
    """Russian tax identification number."""

    TITLE = "INN"

    impl = inn

    @classmethod
    def format(cls, value: str) -> str:
        return value

ISIN

Bases: StdnumFormat

An ISIN number for a security.

Source code in rigour/ids/stdnum_.py
class ISIN(StdnumFormat):
    """An ISIN number for a security."""

    TITLE = "ISIN"
    STRONG: bool = True

    impl = isin

    @classmethod
    def format(cls, value: str) -> str:
        return value.upper()

IdentifierFormat

Bases: object

Base class for identifier types.

Source code in rigour/ids/common.py
class IdentifierFormat(object):
    """Base class for identifier types."""

    TITLE: str = "Generic identifier"
    STRONG: bool = False

    @classmethod
    def is_valid(cls, value: str) -> bool:
        norm = cls.normalize(value)
        return norm is not None and len(norm) > 0

    @classmethod
    def normalize(cls, value: str) -> Optional[str]:
        return value.strip()

    @classmethod
    def format(cls, value: str) -> str:
        return value

LEI

Bases: StdnumFormat

Legal Entity Identifier (ISO 17442)

Source code in rigour/ids/stdnum_.py
class LEI(StdnumFormat):
    """Legal Entity Identifier (ISO 17442)"""

    TITLE = "LEI"
    STRONG: bool = True

    impl = lei

    @classmethod
    def format(cls, value: str) -> str:
        return value.upper()

NPI

Bases: IdentifierFormat

National Provider Identifier.

Source code in rigour/ids/npi.py
class NPI(IdentifierFormat):
    """National Provider Identifier."""

    TITLE: str = "NPI"
    STRONG: bool = True

    # cf. https://www.johndcook.com/blog/2024/06/26/npi-number/

    @classmethod
    def is_valid(cls, text: str) -> bool:
        """Determine if the given string is a valid NPI."""
        if NPI_RE.match(text) is None:
            return False

        if text in INVALID:
            return False

        if len(text) == 10:
            text = "80840" + text

        return bool(luhn.is_valid(text))

    @classmethod
    def normalize(cls, text: str) -> Optional[str]:
        """Normalize the given string to a valid NPI."""
        match = NPI_RE.search(text)
        if match is None:
            return None
        value = match.group(1)
        if cls.is_valid(value) and value not in INVALID:
            return value
        return None

is_valid(text) classmethod

Determine if the given string is a valid NPI.

Source code in rigour/ids/npi.py
@classmethod
def is_valid(cls, text: str) -> bool:
    """Determine if the given string is a valid NPI."""
    if NPI_RE.match(text) is None:
        return False

    if text in INVALID:
        return False

    if len(text) == 10:
        text = "80840" + text

    return bool(luhn.is_valid(text))

normalize(text) classmethod

Normalize the given string to a valid NPI.

Source code in rigour/ids/npi.py
@classmethod
def normalize(cls, text: str) -> Optional[str]:
    """Normalize the given string to a valid NPI."""
    match = NPI_RE.search(text)
    if match is None:
        return None
    value = match.group(1)
    if cls.is_valid(value) and value not in INVALID:
        return value
    return None

OGRN

Bases: IdentifierFormat

Primary State Registration Number (Russian company registration).

Source code in rigour/ids/ogrn.py
class OGRN(IdentifierFormat):
    """Primary State Registration Number (Russian company registration)."""

    TITLE: str = "OGRN"
    STRONG: bool = True

    # cf. https://docs.trellix.com/de-DE/bundle/data-loss-prevention-11.10.x-classification-definitions-reference-guide/page/GUID-945B4343-861E-4A57-8E60-8B6028871BA1.html

    @classmethod
    def is_valid(cls, text: str) -> bool:
        """Determine if the given string is a valid OGRN."""
        if OGRN_RE.match(text) is None:
            return False

        # Validate registration type
        if text[0] == "0":
            return False

        # Validate federal subject code
        federal_subject_code = int(text[3:5])
        if federal_subject_code not in VALID_FEDERAL_SUBJECT_CODES:
            return False

        # Validate control digit logic
        control_digit = int(text[-1])
        return control_digit == cls.calculate_control_digit(text)

    @classmethod
    def normalize(cls, text: str) -> Optional[str]:
        """Normalize the given string to a valid OGRN."""
        match = OGRN_RE.search(text)
        if match is None:
            return None
        value = match.group(1)
        if cls.is_valid(value):
            return value
        return None

    @classmethod
    def calculate_control_digit(cls, grn: str) -> Optional[int]:
        if len(grn) == 13:
            number = int(grn[:12])
            mod_result = number % 11
            calculated_digit = mod_result if mod_result != 10 else 0
            return calculated_digit
        elif len(grn) == 15:
            number = int(grn[:14])
            mod_result = number % 13
            calculated_digit = mod_result if mod_result != 10 else 0
            return calculated_digit
        return None

is_valid(text) classmethod

Determine if the given string is a valid OGRN.

Source code in rigour/ids/ogrn.py
@classmethod
def is_valid(cls, text: str) -> bool:
    """Determine if the given string is a valid OGRN."""
    if OGRN_RE.match(text) is None:
        return False

    # Validate registration type
    if text[0] == "0":
        return False

    # Validate federal subject code
    federal_subject_code = int(text[3:5])
    if federal_subject_code not in VALID_FEDERAL_SUBJECT_CODES:
        return False

    # Validate control digit logic
    control_digit = int(text[-1])
    return control_digit == cls.calculate_control_digit(text)

normalize(text) classmethod

Normalize the given string to a valid OGRN.

Source code in rigour/ids/ogrn.py
@classmethod
def normalize(cls, text: str) -> Optional[str]:
    """Normalize the given string to a valid OGRN."""
    match = OGRN_RE.search(text)
    if match is None:
        return None
    value = match.group(1)
    if cls.is_valid(value):
        return value
    return None

StrictFormat

Bases: IdentifierFormat

A generic identifier type that applies harsh normalization.

Source code in rigour/ids/strict.py
class StrictFormat(IdentifierFormat):
    """A generic identifier type that applies harsh normalization."""

    TITLE: str = "Strict identifier"

    @classmethod
    def is_valid(cls, value: str) -> bool:
        norm = cls.normalize(value)
        return norm is not None and len(norm) > 2

    @classmethod
    def normalize(cls, value: str) -> Optional[str]:
        ascii = ascii_text(value)
        if ascii is None or len(ascii) < 2:
            return None
        chars = [c for c in ascii if c.isalnum()]
        return "".join(chars).upper()

WikidataQID

Bases: IdentifierFormat

A wikidata item identifier.

Source code in rigour/ids/wikidata.py
class WikidataQID(IdentifierFormat):
    """A wikidata item identifier."""

    TITLE: str = "Wikidata QID"
    STRONG: bool = True

    @classmethod
    def is_valid(cls, text: str) -> bool:
        """Determine if the given string is a valid wikidata QID."""
        return is_qid(text)

    @classmethod
    def normalize(cls, text: str) -> Optional[str]:
        """Normalize the given string to a valid wikidata QID."""
        text = text.rsplit("/", 1)[-1].strip().upper()
        match = QID.match(text)
        if match is None:
            return None
        return text

is_valid(text) classmethod

Determine if the given string is a valid wikidata QID.

Source code in rigour/ids/wikidata.py
@classmethod
def is_valid(cls, text: str) -> bool:
    """Determine if the given string is a valid wikidata QID."""
    return is_qid(text)

normalize(text) classmethod

Normalize the given string to a valid wikidata QID.

Source code in rigour/ids/wikidata.py
@classmethod
def normalize(cls, text: str) -> Optional[str]:
    """Normalize the given string to a valid wikidata QID."""
    text = text.rsplit("/", 1)[-1].strip().upper()
    match = QID.match(text)
    if match is None:
        return None
    return text

get_identifier_format(name)

Get the identifier type class for the given format name.

Source code in rigour/ids/__init__.py
def get_identifier_format(name: str) -> Type[IdentifierFormat]:
    """Get the identifier type class for the given format name."""
    return FORMATS[name]

get_identifier_format_names()

Get a list of all identifier type names.

Source code in rigour/ids/__init__.py
def get_identifier_format_names() -> List[str]:
    """Get a list of all identifier type names."""
    return list(FORMATS.keys())

get_identifier_formats()

Get a list of all identifier formats.

Source code in rigour/ids/__init__.py
def get_identifier_formats() -> List[FormatSpec]:
    """Get a list of all identifier formats."""
    formats: List[FormatSpec] = []
    for type_ in set(FORMATS.values()):
        names = [name for name, cls in FORMATS.items() if cls == type_]
        fmt: FormatSpec = {
            "names": names,
            "title": type_.TITLE,
            "description": type_.__doc__ or "",
        }
        formats.append(fmt)
    return sorted(formats, key=lambda f: f["title"])

get_strong_format_names() cached

Get a list of all strong identifier type names.

Source code in rigour/ids/__init__.py
@cache
def get_strong_format_names() -> List[str]:
    """Get a list of all strong identifier type names."""
    return [name for name, cls in FORMATS.items() if cls.STRONG]