"""Python Cookbook

Chapter 10, Examples from the text.

•	Using pathlib to work with file names
•	Replacing a file while preserving the previous version
•	Reading delimited files with the ``csv`` module
•	Using data classes to simplify working with CSV files
•	Reading complex formats using regular expressions
•	Reading JSON and YAML documents
•	Reading XML documents
•	Reading HTML documents
•	Refactoring a csv DictReader to a dataclass reader

Note: Output from this is used in Chapter 4 examples.
"""

# Using pathlib to work with file names

>>> from pathlib import Path

>>> from argparse import Namespace
>>> options = Namespace(
...     input='/path/to/some/file.csv',
...     file1='data/ch10_file1.yaml',
...     file2='data/ch10_file2.yaml',
... )

>>> input_path = Path(options.input)
>>> input_path
PosixPath('/path/to/some/file.csv')

>>> output_path = input_path.with_suffix('.out')
>>> output_path
PosixPath('/path/to/some/file.out')

>>> input_path = Path(options.input)
>>> input_path
PosixPath('/path/to/some/file.csv')

>>> input_directory = input_path.parent
>>> input_stem = input_path.stem

>>> output_stem_pass = f"{input_stem}_pass"
>>> output_stem_pass
'file_pass'

>>> output_path = (input_directory / output_stem_pass).with_suffix('.csv')
>>> output_path
PosixPath('/path/to/some/file_pass.csv')

>>> input_path = Path(options.input)
>>> input_path
PosixPath('/path/to/some/file.csv')

>>> output_parent = input_path.parent / "output"
>>> output_parent
PosixPath('/path/to/some/output')

>>> input_stem = input_path.stem
>>> output_path = (output_parent / input_stem).with_suffix('.src')

>>> file1_path = Path(options.file1)
>>> file1_path
PosixPath('data/ch10_file1.yaml')
>>> file2_path = Path(options.file2)
>>> file2_path
PosixPath('data/ch10_file2.yaml')

# The following test values will depend on when the data files were created

>>> file1_path.stat().st_mtime  # doctest: +SKIP
1464445185.0
>>> file2_path.stat().st_mtime  # doctest: +SKIP
1466209804.0

>>> import datetime
>>> mtime_1 = file1_path.stat().st_mtime
>>> datetime.datetime.fromtimestamp(mtime_1)  # doctest: +SKIP
datetime.datetime(2016, 5, 28, 7, 19, 45)

>>> input_path = Path(options.input)
>>> input_path
PosixPath('/path/to/some/file.csv')

>>> try:
...     input_path.unlink()
... except FileNotFoundError as ex:
...     print("File already deleted")
File already deleted

>>> Path(options.file1)
PosixPath('data/ch10_file1.yaml')
>>> directory_path = Path(options.file1).parent
>>> directory_path
PosixPath('data')

>>> list(directory_path.glob("*.csv"))  # doctest: +NORMALIZE_WHITESPACE
[PosixPath('data/wc1.csv'), PosixPath('data/ex2_r12.csv'),
 PosixPath('data/wc.csv'), PosixPath('data/ch07_r13.csv'),
 PosixPath('data/sample.csv'),
 PosixPath('data/craps.csv'), PosixPath('data/output.csv'),
 PosixPath('data/fuel.csv'), PosixPath('data/waypoints.csv'),
 PosixPath('data/quotient.csv'),
 PosixPath('data/summary_log.csv'), PosixPath('data/fuel2.csv')]

>>> from pathlib import PureWindowsPath
>>> home_path = PureWindowsPath(r'C:\Users\slott')
>>> name_path = home_path / 'filename.ini'
>>> name_path
PureWindowsPath('C:/Users/slott/filename.ini')
>>> str(name_path)
'C:\\Users\\slott\\filename.ini'

# Reading complex formats using regular expressions

>>> import re
>>> pattern_text = (
...        r"\[   (?P<date>.*?)  \]\s+"
...        r"     (?P<level>\w+)   \s+"
...        r"in\s+(?P<module>.+?)"
...        r":\s+ (?P<message>.+)"
... )
>>> pattern = re.compile(pattern_text, re.X)

>>> sample_data = '[2016-05-08 11:08:18,651] INFO in ch10_r09: Sample Message One'
>>> match = pattern.match(sample_data)
>>> match.groups()
('2016-05-08 11:08:18,651', 'INFO', 'ch10_r09', 'Sample Message One')
>>> match.groupdict()  # doctest: +NORMALIZE_WHITESPACE
{'date': '2016-05-08 11:08:18,651',
'level': 'INFO',
'module': 'ch10_r09',
'message': 'Sample Message One'}


# Reading JSON and YAML documents

>>> import json

>>> from pathlib import Path
>>> source_path = Path("data/race_result.json")
>>> document = json.loads(source_path.read_text())


>>> document['teams'][6]['name']
'Team Vestas Wind'

>>> document['legs'][5]
'ITAJAÍ - NEWPORT'


>>> import datetime
>>> example_date = datetime.datetime(2014, 6, 7, 8, 9, 10)
>>> document = {'date': example_date}

>>> json.dumps(document)
Traceback (most recent call last):
  File "/Users/slott/miniconda3/envs/cookbook/lib/python3.8/doctest.py", line 1328, in __run
    compileflags, 1), test.globs)
  File "<doctest examples.txt[73]>", line 1, in <module>
    json.dumps(document)
  File "/Users/slott/miniconda3/envs/cookbook/lib/python3.8/json/__init__.py", line 231, in dumps
    return _default_encoder.encode(obj)
  File "/Users/slott/miniconda3/envs/cookbook/lib/python3.8/json/encoder.py", line 199, in encode
    chunks = self.iterencode(o, _one_shot=True)
  File "/Users/slott/miniconda3/envs/cookbook/lib/python3.8/json/encoder.py", line 257, in iterencode
    return _iterencode(o, 0)
  File "/Users/slott/miniconda3/envs/cookbook/lib/python3.8/json/encoder.py", line 179, in default
    raise TypeError(f'Object of type {o.__class__.__name__} '
TypeError: Object of type datetime is not JSON serializable

>>> document_converted = {'date': example_date.isoformat()}
>>> json.dumps(document_converted)
'{"date": "2014-06-07T08:09:10"}'

>>> from typing import Any, Union, Dict
>>> def default_date(object: Any) -> Union[Any, Dict[str, Any]]:
...     if isinstance(object, datetime.datetime):
...         return {"$date": object.isoformat()}
...     return object

>>> example_date = datetime.datetime(2014, 6, 7, 8, 9, 10)
>>> document = {'date': example_date}
>>> print(
...     json.dumps(document, default=default_date, indent=2))
{
  "date": {
    "$date": "2014-06-07T08:09:10"
  }
}


>>> def as_date(object):
...     if {'$date'} == set(object.keys()):
...         return datetime.datetime.strptime(
...            object['$date'], '%Y-%m-%dT%H:%M:%S')
...     return object

>>> source = '''{"date": {"$date": "2014-06-07T08:09:10"}}'''
>>> json.loads(source, object_hook=as_date)
{'date': datetime.datetime(2014, 6, 7, 8, 9, 10)}

# Reading XML documents

>>> import xml.etree.ElementTree as XML
>>> from pathlib import Path

>>> source_path = Path("data/race_result.xml")
>>> source_text = source_path.read_text(encoding='UTF-8')
>>> document = XML.fromstring(source_text)

>>> teams = document.find('teams')
>>> name = teams.find('team').find('name')
>>> name.text.strip()
'Abu Dhabi Ocean Racing'

>>> for tag in document.findall('teams/team/name'):
...      print(tag.text.strip())
Abu Dhabi Ocean Racing
Team Brunel
Dongfeng Race Team
MAPFRE
Team Alvimedica
Team SCA
Team Vestas Wind

>>> for tag in document.findall("teams/team/position/leg[@n='8']"):
...     print(tag.text.strip())
3
5
7
4
6
1
2


# Reading HTML documents

>>> from bs4 import BeautifulSoup
>>> from pathlib import Path


>>> source_path = Path("data/Volvo Ocean Race.html")
>>> with source_path.open(encoding='utf8') as source_file:
...     soup = BeautifulSoup(source_file, 'html.parser')

>>> def clean_leg(text: str) -> str:
...     leg_soup = BeautifulSoup(text, "html.parser")
...     return leg_soup.text

>>> def get_legs(soup):
...     legs = []
...     thead = soup.table.thead.tr
...     for tag in thead.find_all('th'):
...         if 'data-title' in tag.attrs:
...             leg_description_text = clean_leg(tag.attrs['data-title'])
...             legs.append(leg_description_text)
...     return legs

>>> get_legs(soup)
['ALICANTE - CAPE TOWN', 'CAPE TOWN - ABU DHABI', 'ABU DHABI - SANYA', 'SANYA - AUCKLAND', 'AUCKLAND - ITAJAÍ', 'ITAJAÍ - NEWPORT', 'NEWPORT - LISBON', 'LISBON - LORIENT', 'LORIENT - GOTHENBURG']

>>> def get_legs_comprehension(soup):
...     thead = soup.table.thead.tr
...     legs = [
...         clean_leg(tag.attrs['data-title'])
...           for tag in thead.find_all('th')
...             if 'data-title' in tag.attrs
...     ]
...     return legs

>>> get_legs_comprehension(soup)
['ALICANTE - CAPE TOWN', 'CAPE TOWN - ABU DHABI', 'ABU DHABI - SANYA', 'SANYA - AUCKLAND', 'AUCKLAND - ITAJAÍ', 'ITAJAÍ - NEWPORT', 'NEWPORT - LISBON', 'LISBON - LORIENT', 'LORIENT - GOTHENBURG']


>>> example = BeautifulSoup('''
...     <tr>
...         <td>data</td>
...     </tr>
... ''', 'html.parser')

>>> list(example.tr.children)
['\n', <td>data</td>, '\n']

>>> ranking_table = soup.find('table', class_="ranking-list")
>>> list(tag.name for tag in ranking_table.parents)
['section', 'div', 'div', 'div', 'div', 'body', 'html', '[document]']
