from tld import get_tld from operator import xor def reduce_url(url, scheme=False, netloc=False, path=False, query=False, fragment=False,tld=False, return_list=False, sep=''): """ Returns a list or joined string containing certain parts of a given URL. **If no options are set or the domain is invalid an empty string is returned.** Parameters: url (str): A valid URL scheme (bool) : (optional) If true the scheme is returned (e.g. http). Throws an error if tld is also true! netloc (bool) : (optional) If true the netloc is returned (e.g. mail.google.com). Throws an error if tld is also true! path (bool) : (optional) If true the path is returned (e.g. /foo/bar). Throws an error if tld is also true! query (bool) : (optional) If true the query is returned (e.g. ?foo=true&bar=false). Throws an error if tld is true aswell! fragment (bool) : (optional) If true the fragment is returned (e.g. #a -> Anchor-Points on the site). Throws an error if tld is true aswell! tld (bool) : (optional) If true the Top-Level-Domain is returned (e.g. http). Throws an error if any other option is true! return_list (bool) : (default=False) If true a list of strings is returned. If False a single string is returned. sep (string) : (default='') The seperator that is used to join the string in the list. Returns: reduced (list | string): A list of strings containing only the wanted parts of the URL. Example --- To reduce a given URL you can call the function and pass the parts that should be extraced as a boolean or an number greater than zero. ``` url = 'https://example.com/foo/bar?querystring#fragment' # get the netloc and path as a joined string # output: 'example.com/foo/bar' print(reduce_url(url, netloc=1, path=1)) # get the tld. # output:'com' print(reduce_url(url, tld=True)) ``` """ # if no option is choosen return empty string if not (scheme or netloc or path or query or fragment or tld): return "" # tld has to be of type bool assert type(tld) == bool or type(tld) == int assert type(sep) == str # Can not reduce to part of URL and tld. Even though the tld is part of the netloc we seperate both cases. assert xor((scheme or netloc or path or query or fragment), tld) if tld: return get_tld(url, fail_silently=True) else: tld_obj = get_tld(url, as_object=True, fail_silently=True) if tld_obj is None: return "" else: tld_obj = tld_obj.parsed_url reduced = [] if scheme: reduced.append(tld_obj.scheme) if netloc: reduced.append(tld_obj.netloc) if path: reduced.append(tld_obj.path) if query: reduced.append(tld_obj.query) if fragment: reduced.append(tld_obj.fragment) if return_list: return reduced else: return sep.join(reduced)