Source code for PyFunceble.extractor.expiration_date

"""
The tool to check the availability or syntax of domains, IPv4, IPv6 or URL.

::


    ██████╗ ██╗   ██╗███████╗██╗   ██╗███╗   ██╗ ██████╗███████╗██████╗ ██╗     ███████╗
    ██╔══██╗╚██╗ ██╔╝██╔════╝██║   ██║████╗  ██║██╔════╝██╔════╝██╔══██╗██║     ██╔════╝
    ██████╔╝ ╚████╔╝ █████╗  ██║   ██║██╔██╗ ██║██║     █████╗  ██████╔╝██║     █████╗
    ██╔═══╝   ╚██╔╝  ██╔══╝  ██║   ██║██║╚██╗██║██║     ██╔══╝  ██╔══██╗██║     ██╔══╝
    ██║        ██║   ██║     ╚██████╔╝██║ ╚████║╚██████╗███████╗██████╔╝███████╗███████╗
    ╚═╝        ╚═╝   ╚═╝      ╚═════╝ ╚═╝  ╚═══╝ ╚═════╝╚══════╝╚═════╝ ╚══════╝╚══════╝

Provides the expiration date extractor.

Author:
    Nissar Chababy, @funilrys, contactTATAfunilrysTODTODcom

Special thanks:
    https://pyfunceble.github.io/special-thanks.html

Contributors:
    https://pyfunceble.github.io/contributors.html

Project link:
    https://github.com/funilrys/PyFunceble

Project documentation:
    https://pyfunceble.readthedocs.io///en/master/

Project homepage:
    https://pyfunceble.github.io/

License:
::


    MIT License

    Copyright (c) 2017, 2018, 2019, 2020 Nissar Chababy

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to deal
    in the Software without restriction, including without limitation the rights
    to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    copies of the Software, and to permit persons to whom the Software is
    furnished to do so, subject to the following conditions:

    The above copyright notice and this permission notice shall be included in all
    copies or substantial portions of the Software.

    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
    SOFTWARE.
"""

import PyFunceble.converter as converter
import PyFunceble.helpers as helpers

from .base import ExtractorBase


[docs]class ExpirationDate(ExtractorBase): """ Provides a way to extract the expiration date from a given WHOIS record. :param str data: The WHOIS record.s """ # We initiate all possible regex which correspond to an # expiration date. # We list the list of regex which will help us get an unformatted expiration date. expiration_patterns = [ r"expire:(.*)", r"expire on:(.*)", r"Expiry Date:(.*)", r"free-date(.*)", r"expires:(.*)", r"Expiration date:(.*)", r"Expiry date:(.*)", r"Expire Date:(.*)", r"renewal date:(.*)", r"Expires:(.*)", r"validity:(.*)", r"Expiration Date :(.*)", r"Expiry :(.*)", r"expires at:(.*)", r"domain_datebilleduntil:(.*)", r"Data de expiração \/ Expiration Date \(dd\/mm\/yyyy\):(.*)", r"Fecha de expiración \(Expiration date\):(.*)", r"\[Expires on\](.*)", r"Record expires on(.*)(\(YYYY-MM-DD\))", r"status: OK-UNTIL(.*)", r"renewal:(.*)", r"expires............:(.*)", r"expire-date:(.*)", r"Exp date:(.*)", r"Valid-date(.*)", r"Expires On:(.*)", r"Fecha de vencimiento:(.*)", r"Expiration:.........(.*)", r"Fecha de Vencimiento:(.*)", r"Registry Expiry Date:(.*)", r"Expires on..............:(.*)", r"Expiration Time:(.*)", r"Expiration Date:(.*)", r"Expired:(.*)", r"Date d'expiration:(.*)", r"expiration date:(.*)", ] # The regex which match a digit. regex_numbers = r"[0-9]" # We map the different possible regex. # The regex index represent a unique number which have to be reported # to the __format_management method. regex_dates = { # Date in format: 02-jan-2017 "1": r"([0-9]{2})-([a-z]{3})-([0-9]{4})", # Date in format: 02.01.2017 // Month: jan "2": r"([0-9]{2})\.([0-9]{2})\.([0-9]{4})$", # Date in format: 02/01/2017 // Month: jan "3": r"([0-3][0-9])\/(0[1-9]|1[012])\/([0-9]{4})", # Date in format: 2017-01-02 // Month: jan "4": r"([0-9]{4})-([0-9]{2})-([0-9]{2})$", # Date in format: 2017.01.02 // Month: jan "5": r"([0-9]{4})\.([0-9]{2})\.([0-9]{2})$", # Date in format: 2017/01/02 // Month: jan "6": r"([0-9]{4})\/([0-9]{2})\/([0-9]{2})$", # Date in format: 2017.01.02 15:00:00 "7": r"([0-9]{4})\.([0-9]{2})\.([0-9]{2})\s[0-9]{2}:[0-9]{2}:[0-9]{2}", # Date in format: 20170102 15:00:00 // Month: jan "8": r"([0-9]{4})([0-9]{2})([0-9]{2})\s[0-9]{2}:[0-9]{2}:[0-9]{2}", # Date in format: 2017-01-02 15:00:00 // Month: jan "9": r"([0-9]{4})-([0-9]{2})-([0-9]{2})\s[0-9]{2}:[0-9]{2}:[0-9]{2}", # Date in format: 02.01.2017 15:00:00 // Month: jan "10": r"([0-9]{2})\.([0-9]{2})\.([0-9]{4})\s[0-9]{2}:[0-9]{2}:[0-9]{2}", # Date in format: 02-Jan-2017 15:00:00 UTC "11": r"([0-9]{2})-([A-Z]{1}[a-z]{2})-([0-9]{4})\s[0-9]{2}:[0-9]{2}:[0-9]{2}\s[A-Z]{1}.*", # pylint: disable=line-too-long # Date in format: 2017/01/02 01:00:00 (+0900) // Month: jan "12": r"([0-9]{4})\/([0-9]{2})\/([0-9]{2})\s[0-9]{2}:[0-9]{2}:[0-9]{2}\s\(.*\)", # Date in format: 2017/01/02 01:00:00 // Month: jan "13": r"([0-9]{4})\/([0-9]{2})\/([0-9]{2})\s[0-9]{2}:[0-9]{2}:[0-9]{2}$", # Date in format: Mon Jan 02 15:00:00 GMT 2017 "14": r"[a-zA-Z]{3}\s([a-zA-Z]{3})\s([0-9]{2})\s[0-9]{2}:[0-9]{2}:[0-9]{2}\s[A-Z]{3}\s([0-9]{4})", # pylint: disable=line-too-long # Date in format: Mon Jan 02 2017 "15": r"[a-zA-Z]{3}\s([a-zA-Z]{3})\s([0-9]{2})\s([0-9]{4})", # Date in format: 2017-01-02T15:00:00 // Month: jan "16": r"([0-9]{4})-([0-9]{2})-([0-9]{2})T[0-9]{2}:[0-9]{2}:[0-9]{2}$", # Date in format: 2017-01-02T15:00:00Z // Month: jan${'7} "17": r"([0-9]{4})-([0-9]{2})-([0-9]{2})T[0-9]{2}:[0-9]{2}:[0-9]{2}[A-Z].*", # Date in format: 2017-01-02T15:00:00+0200 // Month: jan "18": r"([0-9]{4})-([0-9]{2})-([0-9]{2})T[0-9]{2}:[0-9]{2}:[0-9]{2}[+-][0-9]{4}", # Date in format: 2017-01-02T15:00:00+0200.622265+03:00 // # Month: jan "19": r"([0-9]{4})-([0-9]{2})-([0-9]{2})T[0-9]{2}:[0-9]{2}:[0-9]{2}\.[0-9].*[+-][0-9]{2}:[0-9]{2}", # pylint: disable=line-too-long # Date in format: 2017-01-02T15:00:00+0200.622265 // Month: jan "20": r"([0-9]{4})-([0-9]{2})-([0-9]{2})T[0-9]{2}:[0-9]{2}:[0-9]{2}\.[0-9]{6}$", # Date in format: 2017-01-02T23:59:59.0Z // Month: jan "21": r"([0-9]{4})-([0-9]{2})-([0-9]{2})T[0-9]{2}:[0-9]{2}:[0-9]{2}\.[0-9].*[A-Z]", # Date in format: 02-01-2017 // Month: jan "22": r"([0-9]{2})-([0-9]{2})-([0-9]{4})", # Date in format: 2017. 01. 02. // Month: jan "23": r"([0-9]{4})\.\s([0-9]{2})\.\s([0-9]{2})\.", # Date in format: 2017-01-02T00:00:00+13:00 // Month: jan "24": r"([0-9]{4})-([0-9]{2})-([0-9]{2})T[0-9]{2}:[0-9]{2}:[0-9]{2}[+-][0-9]{2}:[0-9]{2}", # pylint: disable=line-too-long # Date in format: 20170102 // Month: jan "25": r"(?=[0-9]{8})(?=([0-9]{4})([0-9]{2})([0-9]{2}))", # Date in format: 02-Jan-2017 "26": r"([0-9]{2})-([A-Z]{1}[a-z]{2})-([0-9]{4})$", # Date in format: 02.1.2017 // Month: jan "27": r"([0-9]{2})\.([0-9]{1})\.([0-9]{4})", # Date in format: 02 Jan 2017 "28": r"([0-9]{1,2})\s([A-Z]{1}[a-z]{2})\s([0-9]{4})", # Date in format: 02-January-2017 "29": r"([0-9]{2})-([A-Z]{1}[a-z]*)-([0-9]{4})", # Date in format: 2017-Jan-02. "30": r"([0-9]{4})-([A-Z]{1}[a-z]{2})-([0-9]{2})\.", # Date in format: Mon Jan 02 15:00:00 2017 "31": r"[a-zA-Z]{3}\s([a-zA-Z]{3})\s([0-9]{1,2})\s[0-9]{2}:[0-9]{2}:[0-9]{2}\s([0-9]{4})", # pylint: disable=line-too-long # Date in format: Mon Jan 2017 15:00:00 "32": r"()[a-zA-Z]{3}\s([a-zA-Z]{3})\s([0-9]{4})\s[0-9]{2}:[0-9]{2}:[0-9]{2}", # Date in format: January 02 2017-Jan-02 "33": r"([A-Z]{1}[a-z]*)\s([0-9]{1,2})\s([0-9]{4})", # Date in format: 2.1.2017 // Month: jan "34": r"([0-9]{1,2})\.([0-9]{1,2})\.([0-9]{4})", # Date in format: 20170102000000 // Month: jan "35": r"([0-9]{4})([0-9]{2})([0-9]{2})[0-9]+", # Date in format: 01/02/2017 // Month: jan "36": r"(0[1-9]|1[012])\/([0-3][0-9])\/([0-9]{4})", # Date in format: January 2 2017 "37": r"([A-Z]{1}[a-z].*)\s\s([0-9]{1,2})\s([0-9]{4})", # Date in format: 2nd January 2017 "38": r"([0-9]{1,})[a-z]{1,}\s([A-Z].*)\s(2[0-9]{3})", } # We map our regex numbers with with the right group order. # Note: please report to the method note for more information about the mapping. format_cases = { "first": [[1, 2, 3, 10, 11, 22, 26, 27, 28, 29, 32, 34, 38], [0, 1, 2]], "second": [[14, 15, 31, 33, 36, 37], [1, 0, 2]], "third": [ [4, 5, 6, 7, 8, 9, 12, 13, 16, 17, 18, 19, 20, 21, 23, 24, 25, 30, 35], [2, 1, 0], ], } def __init__(self, data): super().__init__(data) self.extracted_data = self.__extract_it() def __format_management(self, index, matched): """ A format management helper. .. note:: Please note that the second value of the case represent the groups in order :code:`[day,month,year]`. This means that a :code:`[2,1,0]` will be for example for a date in format :code:`2017-01-02` where :code:`01` is the month. :param int index: The identifiant of the regex. :param list matched: The matched result to format. :return: A list representing the expiration date. The list can be "decoded" like :code:`[day, month, year]` :rtype: list|None """ for _, case_data in self.format_cases.items(): if int(index) in case_data[0]: # The regex number is into the currently read case data. # We return a list with the formatted elements. # 1. We convert the day to 2 digits. # 2. We convert the month to the unified format. # 3. We return the year. return [ converter.Digit2Digits(matched[case_data[1][0]]).get_converted(), converter.Month(matched[case_data[1][1]]).get_converted(), str(matched[case_data[1][2]]), ] return matched # pragma: no cover def __format_it(self, data): """ Formats the given data. """ for index, regex in self.regex_dates.items(): matched = helpers.Regex(regex).match(data, return_match=True, rematch=True) if not matched: continue date = self.__format_management(index, matched) if not date: # pragma: no cover continue return "-".join(date) return None # pragma: no cover def __extract_it(self): """ Try to extract the expiration date from the given data. """ for regex in self.expiration_patterns: expiration_date = helpers.Regex(regex).match( self.data, return_match=True, rematch=True, group=0 ) if not expiration_date: continue expiration_date = expiration_date[0].strip() if helpers.Regex(self.regex_numbers).match( expiration_date, return_match=True ): return self.__format_it(expiration_date) return None