Source code for PyFunceble.query.whois.converter.expiration_date
"""
The tool to check the availability or syntax of domain, IP or URL.
::
██████╗ ██╗ ██╗███████╗██╗ ██╗███╗ ██╗ ██████╗███████╗██████╗ ██╗ ███████╗
██╔══██╗╚██╗ ██╔╝██╔════╝██║ ██║████╗ ██║██╔════╝██╔════╝██╔══██╗██║ ██╔════╝
██████╔╝ ╚████╔╝ █████╗ ██║ ██║██╔██╗ ██║██║ █████╗ ██████╔╝██║ █████╗
██╔═══╝ ╚██╔╝ ██╔══╝ ██║ ██║██║╚██╗██║██║ ██╔══╝ ██╔══██╗██║ ██╔══╝
██║ ██║ ██║ ╚██████╔╝██║ ╚████║╚██████╗███████╗██████╔╝███████╗███████╗
╚═╝ ╚═╝ ╚═╝ ╚═════╝ ╚═╝ ╚═══╝ ╚═════╝╚══════╝╚═════╝ ╚══════╝╚══════╝
Provides our expiration date extracter.
Author:
Nissar Chababy, @funilrys, contactTATAfunilrysTODTODcom
Special thanks:
https://pyfunceble.github.io/#/special-thanks
Contributors:
https://pyfunceble.github.io/#/contributors
Project link:
https://github.com/funilrys/PyFunceble
Project documentation:
https://pyfunceble.readthedocs.io/en/latest/
Project homepage:
https://pyfunceble.github.io/
License:
::
Copyright 2017, 2018, 2019, 2020, 2021 Nissar Chababy
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
from typing import List, Optional, Tuple
from PyFunceble.helpers.regex import RegexHelper
from PyFunceble.query.whois.converter.base import ConverterBase
from PyFunceble.query.whois.converter.digit2digits import Digit2Digits
from PyFunceble.query.whois.converter.month2unified import Month2Unified
[docs]class ExpirationDateExtractor(ConverterBase):
"""
Provides an interface for the extraction of the expiration date.
"""
PATTERNS: List[str] = [
r"expire:(.*)",
r"expire on:(.*)",
r"Expiry Date:(.*)",
r"free-date(.*)",
r"expires:(.*)",
r"Expiration date:(.*)",
r"Expiry date:(.*)",
r"Expire Date:(.*)",
r"renewal date:(.*)",
r"Expires:(.*)",
r"validity:(.*)",
r"Expiration Date :(.*)",
r"Expiry :(.*)",
r"expires at:(.*)",
r"domain_datebilleduntil:(.*)",
r"Data de expiração \/ Expiration Date \(dd\/mm\/yyyy\):(.*)",
r"Fecha de expiración \(Expiration date\):(.*)",
r"\[Expires on\](.*)",
r"Record expires on(.*)(\(YYYY-MM-DD\))",
r"status: OK-UNTIL(.*)",
r"renewal:(.*)",
r"expires............:(.*)",
r"expire-date:(.*)",
r"Exp date:(.*)",
r"Valid-date(.*)",
r"Expires On:(.*)",
r"Fecha de vencimiento:(.*)",
r"Expiration:.........(.*)",
r"Fecha de Vencimiento:(.*)",
r"Registry Expiry Date:(.*)",
r"Expires on..............:(.*)",
r"Expiration Time:(.*)",
r"Expiration Date:(.*)",
r"Expired:(.*)",
r"Date d'expiration:(.*)",
r"expiration date:(.*)",
]
"""
Provides all our known patterns.
"""
REGEX_DIGITS: str = r"[0-9]"
"""
Provides the regex to match in order to extract the digits.
"""
MARKER2DATE_REGEX: dict = {
# Date in format: 02-jan-2017
"1": r"([0-9]{2})-([a-z]{3})-([0-9]{4})",
# Date in format: 02.01.2017 // Month: jan
"2": r"([0-9]{2})\.([0-9]{2})\.([0-9]{4})$",
# Date in format: 02/01/2017 // Month: jan
"3": r"([0-3][0-9])\/(0[1-9]|1[012])\/([0-9]{4})",
# Date in format: 2017-01-02 // Month: jan
"4": r"([0-9]{4})-([0-9]{2})-([0-9]{2})$",
# Date in format: 2017.01.02 // Month: jan
"5": r"([0-9]{4})\.([0-9]{2})\.([0-9]{2})$",
# Date in format: 2017/01/02 // Month: jan
"6": r"([0-9]{4})\/([0-9]{2})\/([0-9]{2})$",
# Date in format: 2017.01.02 15:00:00
"7": r"([0-9]{4})\.([0-9]{2})\.([0-9]{2})\s[0-9]{2}:[0-9]{2}:[0-9]{2}",
# Date in format: 20170102 15:00:00 // Month: jan
"8": r"([0-9]{4})([0-9]{2})([0-9]{2})\s[0-9]{2}:[0-9]{2}:[0-9]{2}",
# Date in format: 2017-01-02 15:00:00 // Month: jan
"9": r"([0-9]{4})-([0-9]{2})-([0-9]{2})\s[0-9]{2}:[0-9]{2}:[0-9]{2}",
# Date in format: 02.01.2017 15:00:00 // Month: jan
"10": r"([0-9]{2})\.([0-9]{2})\.([0-9]{4})\s[0-9]{2}:[0-9]{2}:[0-9]{2}",
# Date in format: 02-Jan-2017 15:00:00 UTC
"11": r"([0-9]{2})-([A-Z]{1}[a-z]{2})-([0-9]{4})\s[0-9]{2}:[0-9]{2}:[0-9]{2}\s[A-Z]{1}.*", # pylint: disable=line-too-long # noqa: E501
# Date in format: 2017/01/02 01:00:00 (+0900) // Month: jan
"12": r"([0-9]{4})\/([0-9]{2})\/([0-9]{2})\s[0-9]{2}:[0-9]{2}:[0-9]{2}\s\(.*\)",
# Date in format: 2017/01/02 01:00:00 // Month: jan
"13": r"([0-9]{4})\/([0-9]{2})\/([0-9]{2})\s[0-9]{2}:[0-9]{2}:[0-9]{2}$",
# Date in format: Mon Jan 02 15:00:00 GMT 2017
"14": r"[a-zA-Z]{3}\s([a-zA-Z]{3})\s([0-9]{2})\s[0-9]{2}:[0-9]{2}:[0-9]{2}\s[A-Z]{3}\s([0-9]{4})", # pylint: disable=line-too-long # noqa: E501
# Date in format: Mon Jan 02 2017
"15": r"[a-zA-Z]{3}\s([a-zA-Z]{3})\s([0-9]{2})\s([0-9]{4})",
# Date in format: 2017-01-02T15:00:00 // Month: jan
"16": r"([0-9]{4})-([0-9]{2})-([0-9]{2})T[0-9]{2}:[0-9]{2}:[0-9]{2}$",
# Date in format: 2017-01-02T15:00:00Z // Month: jan${'7}
"17": r"([0-9]{4})-([0-9]{2})-([0-9]{2})T[0-9]{2}:[0-9]{2}:[0-9]{2}[A-Z].*",
# Date in format: 2017-01-02T15:00:00+0200 // Month: jan
"18": r"([0-9]{4})-([0-9]{2})-([0-9]{2})T[0-9]{2}:[0-9]{2}:[0-9]{2}[+-][0-9]{4}", # pylint: disable=line-too-long # noqa: E501
# Date in format: 2017-01-02T15:00:00+0200.622265+03:00 //
# Month: jan
"19": r"([0-9]{4})-([0-9]{2})-([0-9]{2})T[0-9]{2}:[0-9]{2}:[0-9]{2}\.[0-9].*[+-][0-9]{2}:[0-9]{2}", # pylint: disable=line-too-long # noqa: E501
# Date in format: 2017-01-02T15:00:00+0200.622265 // Month: jan
"20": r"([0-9]{4})-([0-9]{2})-([0-9]{2})T[0-9]{2}:[0-9]{2}:[0-9]{2}\.[0-9]{6}$",
# Date in format: 2017-01-02T23:59:59.0Z // Month: jan
"21": r"([0-9]{4})-([0-9]{2})-([0-9]{2})T[0-9]{2}:[0-9]{2}:[0-9]{2}\.[0-9].*[A-Z]", # pylint: disable=line-too-long # noqa: E501
# Date in format: 02-01-2017 // Month: jan
"22": r"([0-9]{2})-([0-9]{2})-([0-9]{4})",
# Date in format: 2017. 01. 02. // Month: jan
"23": r"([0-9]{4})\.\s([0-9]{2})\.\s([0-9]{2})\.",
# Date in format: 2017-01-02T00:00:00+13:00 // Month: jan
"24": r"([0-9]{4})-([0-9]{2})-([0-9]{2})T[0-9]{2}:[0-9]{2}:[0-9]{2}[+-][0-9]{2}:[0-9]{2}", # pylint: disable=line-too-long # noqa: E501
# Date in format: 20170102 // Month: jan
"25": r"(?=[0-9]{8})(?=([0-9]{4})([0-9]{2})([0-9]{2}))",
# Date in format: 02-Jan-2017
"26": r"([0-9]{2})-([A-Z]{1}[a-z]{2})-([0-9]{4})$",
# Date in format: 02.1.2017 // Month: jan
"27": r"([0-9]{2})\.([0-9]{1})\.([0-9]{4})",
# Date in format: 02 Jan 2017
"28": r"([0-9]{1,2})\s([A-Z]{1}[a-z]{2})\s([0-9]{4})",
# Date in format: 02-January-2017
"29": r"([0-9]{2})-([A-Z]{1}[a-z]*)-([0-9]{4})",
# Date in format: 2017-Jan-02.
"30": r"([0-9]{4})-([A-Z]{1}[a-z]{2})-([0-9]{2})\.",
# Date in format: Mon Jan 02 15:00:00 2017
"31": r"[a-zA-Z]{3}\s([a-zA-Z]{3})\s([0-9]{1,2})\s[0-9]{2}:[0-9]{2}:[0-9]{2}\s([0-9]{4})", # pylint: disable=line-too-long # noqa: E501
# Date in format: Mon Jan 2017 15:00:00
"32": r"()[a-zA-Z]{3}\s([a-zA-Z]{3})\s([0-9]{4})\s[0-9]{2}:[0-9]{2}:[0-9]{2}",
# Date in format: January 02 2017-Jan-02
"33": r"([A-Z]{1}[a-z]*)\s([0-9]{1,2})\s([0-9]{4})",
# Date in format: 2.1.2017 // Month: jan
"34": r"([0-9]{1,2})\.([0-9]{1,2})\.([0-9]{4})",
# Date in format: 20170102000000 // Month: jan
"35": r"([0-9]{4})([0-9]{2})([0-9]{2})[0-9]+",
# Date in format: 01/02/2017 // Month: jan
"36": r"(0[1-9]|1[012])\/([0-3][0-9])\/([0-9]{4})",
# Date in format: January 2 2017
"37": r"([A-Z]{1}[a-z].*)\s\s([0-9]{1,2})\s([0-9]{4})",
# Date in format: 2nd January 2017
"38": r"([0-9]{1,})[a-z]{1,}\s([A-Z].*)\s(2[0-9]{3})",
}
"""
Provides all the known regex to extract the date.
.. note::
The index (or key) will be used to map or group some of them later.
"""
REGEX_PARSE_MAP: List[dict] = [
{
"regex_keys": [1, 2, 3, 10, 11, 22, 26, 27, 28, 29, 32, 34, 38],
"positions": {"day": 0, "month": 1, "year": 2},
},
{
"regex_keys": [14, 15, 31, 33, 36, 37],
"positions": {"day": 1, "month": 0, "year": 2},
},
{
"regex_keys": [
4,
5,
6,
7,
8,
9,
12,
13,
16,
17,
18,
19,
20,
21,
23,
24,
25,
30,
35,
],
"positions": {"day": 2, "month": 1, "year": 0},
},
]
"""
Our parsing map. Indeed, we hava a list of regex, but no way to know
how to parse them. Especially when the order (month, day, year) are
different from a format to another.
This variable solve that problem by interpreting all regex we previously
created.
"""
@ConverterBase.data_to_convert.setter
def data_to_convert(self, value: str) -> None:
"""
Sets the data to convert or work with.
:param value:
The record to work with.
:raise TypeError:
When the given :code:`value` is not a :py:class:`str`.
:raise ValueError:
When the given :code:`value` is empty.
"""
if not isinstance(value, str):
raise TypeError(f"<value> should be {str}, {type(value)} is given.")
if not value:
raise ValueError("<value> should not be empty.")
# pylint: disable=no-member
super(ExpirationDateExtractor, self.__class__).data_to_convert.fset(self, value)
def __get_line(self) -> Optional[str]:
"""
Tries to get the expiration date line from the given record.
"""
for regex in self.PATTERNS:
expiration_date_line = RegexHelper(regex).match(
self.data_to_convert, return_match=True, rematch=True, group=0
)
if not expiration_date_line:
continue
return expiration_date_line
return None
def __get_actual_expiration_date(
self, extracted: str
) -> Optional[Tuple[str, str, str]]:
"""
Tries to extract the actual expiration date.
"""
for index, date_regex in self.MARKER2DATE_REGEX.items():
matched = RegexHelper(date_regex).match(
extracted, return_match=True, rematch=True
)
date_parts = tuple()
if not matched:
continue
for parse_case in self.REGEX_PARSE_MAP:
if int(index) not in parse_case["regex_keys"]:
continue
date_parts = (
Digit2Digits(
matched[parse_case["positions"]["day"]],
).get_converted(),
Month2Unified(
matched[parse_case["positions"]["month"]]
).get_converted(),
str(matched[parse_case["positions"]["year"]]),
)
if date_parts:
return "-".join(date_parts)
return None
[docs] @ConverterBase.ensure_data_to_convert_is_given
def get_converted(self) -> Optional[str]:
"""
Provides the expiration date of the record (if found).
"""
expiration_date_line = self.__get_line()
if expiration_date_line:
expiration_date = expiration_date_line[0].strip()
if RegexHelper(self.REGEX_DIGITS).match(
expiration_date, return_match=False
):
return self.__get_actual_expiration_date(expiration_date)
return None