diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..1e4d7be54f413a1bf6112dbed17c578f69c6bea7 Binary files /dev/null and b/.DS_Store differ diff --git a/.gitignore b/.gitignore index 37fc9d40817580a7922e9f71772922e58465b2b1..81cc48e521bef9bac816ab4e4b0ddae62594384d 100644 --- a/.gitignore +++ b/.gitignore @@ -88,3 +88,16 @@ ENV/ # Rope project settings .ropeproject + +# Jupyternotebooks +*.ipynb + +# My Data +/Data + +# Vs code Setting +.vscode/ + +# pytest +.pytest_cache + diff --git a/README.rst b/README.rst index 30f0e7faca50980dd6435dcfa8dd6480293da140..c930acfddcced0092ef36525c9774a885e2e7d0e 100644 --- a/README.rst +++ b/README.rst @@ -1,10 +1,4 @@ -Sample Module Repository +Formula One Module Repository ======================== -This simple project is an example repo for Python projects. - -`Learn more <http://www.kennethreitz.org/essays/repository-structure-and-python>`_. - ---------------- - -If you want to learn more about ``setup.py`` files, check out `this repository <https://github.com/kennethreitz/setup.py>`_. +This Ptyhon project downloads and prepares formula one data from the API ergast.com. diff --git a/docs/.DS_Store b/docs/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..d0adb4c99d4096bdb28a9d15471afd72997028cb Binary files /dev/null and b/docs/.DS_Store differ diff --git a/docs/API.rst b/docs/API.rst new file mode 100644 index 0000000000000000000000000000000000000000..7e329883b1d51fc761818959a8a291c7298a489c --- /dev/null +++ b/docs/API.rst @@ -0,0 +1,7 @@ +API +=== + +.. autosummary:: + :toctree: generated + + formulaone.helpers \ No newline at end of file diff --git a/docs/conf.py b/docs/conf.py index ac03bd0971fbc0d09e3b54063518e452ad49c033..97f4245617b234bf607a2a4936583284ea5db525 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -11,21 +11,26 @@ # All configuration values have a default; values that are commented out # serve to show the default. -import sys, os - # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. -#sys.path.insert(0, os.path.abspath('.')) +# sys.path.insert(0, os.path.abspath('.')) + +import pathlib +import sys +sys.path.insert(0, pathlib.Path(__file__).parents[2].resolve().as_posix()) # -- General configuration ----------------------------------------------------- # If your documentation needs a minimal Sphinx version, state it here. -#needs_sphinx = '1.0' +# needs_sphinx = '1.0' # Add any Sphinx extension module names here, as strings. They can be extensions # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. -extensions = [] +extensions = [ + 'sphinx.ext.autodoc', + 'sphinx.ext.autosummary' +] # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] @@ -34,7 +39,7 @@ templates_path = ['_templates'] source_suffix = '.rst' # The encoding of source files. -#source_encoding = 'utf-8-sig' +# source_encoding = 'utf-8-sig' # The master toctree document. master_doc = 'index' @@ -54,37 +59,37 @@ release = 'v0.0.1' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. -#language = None +# language = None # There are two options for replacing |today|: either, you set today to some # non-false value, then it is used: -#today = '' +# today = '' # Else, today_fmt is used as the format for a strftime call. -#today_fmt = '%B %d, %Y' +# today_fmt = '%B %d, %Y' # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. exclude_patterns = ['_build'] # The reST default role (used for this markup: `text`) to use for all documents. -#default_role = None +# default_role = None # If true, '()' will be appended to :func: etc. cross-reference text. -#add_function_parentheses = True +# add_function_parentheses = True # If true, the current module name will be prepended to all description # unit titles (such as .. function::). -#add_module_names = True +# add_module_names = True # If true, sectionauthor and moduleauthor directives will be shown in the # output. They are ignored by default. -#show_authors = False +# show_authors = False # The name of the Pygments (syntax highlighting) style to use. pygments_style = 'sphinx' # A list of ignored prefixes for module index sorting. -#modindex_common_prefix = [] +# modindex_common_prefix = [] # -- Options for HTML output --------------------------------------------------- @@ -96,26 +101,26 @@ html_theme = 'default' # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. -#html_theme_options = {} +# html_theme_options = {} # Add any paths that contain custom themes here, relative to this directory. -#html_theme_path = [] +# html_theme_path = [] # The name for this set of Sphinx documents. If None, it defaults to # "<project> v<release> documentation". -#html_title = None +# html_title = None # A shorter title for the navigation bar. Default is the same as html_title. -#html_short_title = None +# html_short_title = None # The name of an image file (relative to this directory) to place at the top # of the sidebar. -#html_logo = None +# html_logo = None # The name of an image file (within the static path) to use as favicon of the # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 # pixels large. -#html_favicon = None +# html_favicon = None # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, @@ -124,44 +129,44 @@ html_static_path = ['_static'] # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, # using the given strftime format. -#html_last_updated_fmt = '%b %d, %Y' +# html_last_updated_fmt = '%b %d, %Y' # If true, SmartyPants will be used to convert quotes and dashes to # typographically correct entities. -#html_use_smartypants = True +# html_use_smartypants = True # Custom sidebar templates, maps document names to template names. -#html_sidebars = {} +# html_sidebars = {} # Additional templates that should be rendered to pages, maps page names to # template names. -#html_additional_pages = {} +# html_additional_pages = {} # If false, no module index is generated. -#html_domain_indices = True +# html_domain_indices = True # If false, no index is generated. -#html_use_index = True +# html_use_index = True # If true, the index is split into individual pages for each letter. -#html_split_index = False +# html_split_index = False # If true, links to the reST sources are added to the pages. -#html_show_sourcelink = True +# html_show_sourcelink = True # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. -#html_show_sphinx = True +# html_show_sphinx = True # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. -#html_show_copyright = True +# html_show_copyright = True # If true, an OpenSearch description file will be output, and all pages will # contain a <link> tag referring to it. The value of this option must be the # base URL from which the finished HTML is served. -#html_use_opensearch = '' +# html_use_opensearch = '' # This is the file name suffix for HTML files (e.g. ".xhtml"). -#html_file_suffix = None +# html_file_suffix = None # Output file base name for HTML help builder. htmlhelp_basename = 'sampledoc' @@ -170,42 +175,42 @@ htmlhelp_basename = 'sampledoc' # -- Options for LaTeX output -------------------------------------------------- latex_elements = { -# The paper size ('letterpaper' or 'a4paper'). -#'papersize': 'letterpaper', + # The paper size ('letterpaper' or 'a4paper'). + # 'papersize': 'letterpaper', -# The font size ('10pt', '11pt' or '12pt'). -#'pointsize': '10pt', + # The font size ('10pt', '11pt' or '12pt'). + # 'pointsize': '10pt', -# Additional stuff for the LaTeX preamble. -#'preamble': '', + # Additional stuff for the LaTeX preamble. + # 'preamble': '', } # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, author, documentclass [howto/manual]). latex_documents = [ - ('index', 'sample.tex', u'sample Documentation', - u'Kenneth Reitz', 'manual'), + ('index', 'sample.tex', u'sample Documentation', + u'Kenneth Reitz', 'manual'), ] # The name of an image file (relative to this directory) to place at the top of # the title page. -#latex_logo = None +# latex_logo = None # For "manual" documents, if this is true, then toplevel headings are parts, # not chapters. -#latex_use_parts = False +# latex_use_parts = False # If true, show page references after internal links. -#latex_show_pagerefs = False +# latex_show_pagerefs = False # If true, show URL addresses after external links. -#latex_show_urls = False +# latex_show_urls = False # Documents to append as an appendix to all manuals. -#latex_appendices = [] +# latex_appendices = [] # If false, no module index is generated. -#latex_domain_indices = True +# latex_domain_indices = True # -- Options for manual page output -------------------------------------------- @@ -218,7 +223,7 @@ man_pages = [ ] # If true, show URL addresses after external links. -#man_show_urls = False +# man_show_urls = False # -- Options for Texinfo output ------------------------------------------------ @@ -227,16 +232,16 @@ man_pages = [ # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ - ('index', 'sample', u'sample Documentation', - u'Kenneth Reitz', 'sample', 'One line description of project.', - 'Miscellaneous'), + ('index', 'sample', u'sample Documentation', + u'Kenneth Reitz', 'sample', 'One line description of project.', + 'Miscellaneous'), ] # Documents to append as an appendix to all manuals. -#texinfo_appendices = [] +# texinfo_appendices = [] # If false, no module index is generated. -#texinfo_domain_indices = True +# texinfo_domain_indices = True # How to display URL addresses: 'footnote', 'no', or 'inline'. -#texinfo_show_urls = 'footnote' +# texinfo_show_urls = 'footnote' diff --git a/docs/generated/sample.helpers.rst b/docs/generated/sample.helpers.rst new file mode 100644 index 0000000000000000000000000000000000000000..c75d5e614a7ed7854e0f49551b7ebc7e824420d2 --- /dev/null +++ b/docs/generated/sample.helpers.rst @@ -0,0 +1,31 @@ +sample.helpers +============== + +.. automodule:: sample.helpers + + + + + + + + .. rubric:: Functions + + .. autosummary:: + + get_path_to_data + get_raw_data_path + get_tidy_data_path + + + + + + + + + + + + + diff --git a/docs/generated/sample.rst b/docs/generated/sample.rst new file mode 100644 index 0000000000000000000000000000000000000000..12cd9db926808d4a390dd8c57f04ef9906128537 --- /dev/null +++ b/docs/generated/sample.rst @@ -0,0 +1,23 @@ +sample +====== + +.. automodule:: sample + + + + + + + + + + + + + + + + + + + diff --git a/docs/index.rst b/docs/index.rst index 46f3fe8e6de1ca94e78536fb2d20ce9934c76fb8..e9dd9c418194927157447103c228a2092d53e8f5 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -9,14 +9,6 @@ Welcome to sample's documentation! Contents: .. toctree:: - :maxdepth: 2 - - - -Indices and tables -================== - -* :ref:`genindex` -* :ref:`modindex` -* :ref:`search` - + + usage + API diff --git a/docs/usage.rst b/docs/usage.rst new file mode 100644 index 0000000000000000000000000000000000000000..bd80d08f543d5f97fc5f1621085ff0fd23129a34 --- /dev/null +++ b/docs/usage.rst @@ -0,0 +1,17 @@ +Usage +===== + +Helpers +------------ + +you can use the ``formulaone.helpers.get_path_to_data()`` function: + +.. autofunction:: formulaone.helpers.get_path_to_data + +Raw data folder is returned by ``formulaone.helpers.get_raw_data_path()`` function: + +.. autofunction:: formulaone.helpers.get_raw_data_path + +you can also use the ``formulaone.helpers.get_tidy_data_path()`` function: + +.. autofunction:: formulaone.helpers.get_tidy_data_path \ No newline at end of file diff --git a/formulaone/__init__.py b/formulaone/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/formulaone/core.py b/formulaone/core.py new file mode 100644 index 0000000000000000000000000000000000000000..d76c319cd908f08e693710c5cb02793253a5ec42 --- /dev/null +++ b/formulaone/core.py @@ -0,0 +1,6 @@ +# -*- coding: utf-8 -*- + + +def get_hmm(): + """Get a thought.""" + return 'hmmm...' diff --git a/formulaone/helpers.py b/formulaone/helpers.py new file mode 100644 index 0000000000000000000000000000000000000000..a172cd48e72791e321a5afc7b6d3ec1f8b3c4329 --- /dev/null +++ b/formulaone/helpers.py @@ -0,0 +1,16 @@ +from pathlib import Path + + +def get_path_to_data(): + """Return path to data.""" + return Path('Data/') + + +def get_raw_data_path(): + """Return path to raw data.""" + return get_path_to_data() / 'RawData' + + +def get_tidy_data_path(): + """Return path to tidy data.""" + return get_path_to_data() / 'TidyData' diff --git a/formulaone/load_latest_race.py b/formulaone/load_latest_race.py new file mode 100644 index 0000000000000000000000000000000000000000..eddffc4edd839bd1f78ccb67d25b44573876fcca --- /dev/null +++ b/formulaone/load_latest_race.py @@ -0,0 +1,20 @@ +import json +import requests + +from helpers import get_raw_data_path + +base_url = 'http://ergast.com/api/f1/' +endpoint = 'current/last/results.json' + + +response = requests.get( + url=base_url + endpoint +) + +data = response.json() + +raw_data_path = get_raw_data_path() +raw_data_path.mkdir(parents=True, exist_ok=True) + +with open(raw_data_path / 'current.json', 'w') as f: + json.dump(data, f) diff --git a/formulaone/prepare_data.py b/formulaone/prepare_data.py new file mode 100644 index 0000000000000000000000000000000000000000..61ddfb2b42935dd0d1ba3cc2d6d8404310ae7dc4 --- /dev/null +++ b/formulaone/prepare_data.py @@ -0,0 +1,45 @@ +import pandas as pd +from numpy import save +from sklearn.preprocessing import StandardScaler, OneHotEncoder +from sklearn.compose import ColumnTransformer + +from helpers import get_tidy_data_path + +df = pd.read_parquet(get_tidy_data_path() / 'current_race.parquet') + +numerical_columns = [ + 'number', + 'position', + 'points', + 'grid', + 'laps', + 'Time.millis', + 'FastestLap.rank', + 'FastestLap.lap', + 'FastestLap.AverageSpeed.speed' +] + +categorical_columns = [ + 'status', + 'Driver.code', + 'Driver.nationality', + 'Constructor.constructorId', +] + +df = df[ + numerical_columns + categorical_columns +] + + +full_pipeline = ColumnTransformer( + [ + ('num', StandardScaler(), numerical_columns), + ('cat', OneHotEncoder(), categorical_columns) + ] +) + +current_prepared_array = full_pipeline.fit_transform(df) +save( + get_tidy_data_path() / 'current_race_prepared.npy', + current_prepared_array +) diff --git a/formulaone/tidy_data.py b/formulaone/tidy_data.py new file mode 100644 index 0000000000000000000000000000000000000000..29caf33e9308dd6cdb72f5998602e655ad49653e --- /dev/null +++ b/formulaone/tidy_data.py @@ -0,0 +1,14 @@ +import json +import pandas as pd + +from helpers import get_raw_data_path, get_tidy_data_path + +tidy_data_path = get_tidy_data_path() +tidy_data_path.mkdir(parents=True, exist_ok=True) + +with open(get_raw_data_path() / 'current.json', 'r') as f: + d = json.load(f) + +df = pd.json_normalize(d['MRData']['RaceTable']['Races'][0]['Results']) + +df.to_parquet(tidy_data_path / 'current_race.parquet') diff --git a/requirements.txt b/requirements.txt index e4ea3e222377614277bb6a5b61fec047767f4a03..e73e418b1c978f856a21a008a0fd97fdb4f693a9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,8 @@ -nose -sphinx \ No newline at end of file +pytest +sphinx +ipykernel +requests +pandas +pyarrow +fastparquet +scikit-learn \ No newline at end of file diff --git a/sample/__init__.py b/sample/__init__.py deleted file mode 100644 index f4633fa3f1e6a824d617e405ee5d9b26d634c383..0000000000000000000000000000000000000000 --- a/sample/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .core import hmm \ No newline at end of file diff --git a/sample/core.py b/sample/core.py deleted file mode 100644 index 19bf06c155492cb47e0e247cd1768cc1d4db2b31..0000000000000000000000000000000000000000 --- a/sample/core.py +++ /dev/null @@ -1,12 +0,0 @@ -# -*- coding: utf-8 -*- -from . import helpers - -def get_hmm(): - """Get a thought.""" - return 'hmmm...' - - -def hmm(): - """Contemplation...""" - if helpers.get_answer(): - print(get_hmm()) diff --git a/sample/helpers.py b/sample/helpers.py deleted file mode 100644 index 63ab898b048c026cc0cf1a65fd8f35215867c81c..0000000000000000000000000000000000000000 --- a/sample/helpers.py +++ /dev/null @@ -1,3 +0,0 @@ -def get_answer(): - """Get an answer.""" - return True diff --git a/setup.py b/setup.py index 12af80686911aae567c92163d62e71a57a13f9dd..859c71ae996b0bad92848fe8c40e5e984bce7e1e 100755 --- a/setup.py +++ b/setup.py @@ -12,14 +12,13 @@ with open('LICENSE') as f: license = f.read() setup( - name='sample', + name='formulaone', version='0.1.0', - description='Sample package for Python-Guide.org', + description='Downloads and prepares formula one data', long_description=readme, - author='Kenneth Reitz', - author_email='me@kennethreitz.com', + author='Timo Schuerg', + author_email='t.schuerg@th-bingen.de', url='https://github.com/kennethreitz/samplemod', license=license, packages=find_packages(exclude=('tests', 'docs')) ) - diff --git a/tests/context.py b/tests/context.py deleted file mode 100644 index 91de701e6098e869568eec74be08049170631ea1..0000000000000000000000000000000000000000 --- a/tests/context.py +++ /dev/null @@ -1,7 +0,0 @@ -# -*- coding: utf-8 -*- - -import sys -import os -sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) - -import sample diff --git a/tests/test_advanced.py b/tests/test_advanced.py deleted file mode 100644 index cc48248e29df65acef554c1106b5b3de30140396..0000000000000000000000000000000000000000 --- a/tests/test_advanced.py +++ /dev/null @@ -1,16 +0,0 @@ -# -*- coding: utf-8 -*- - -from .context import sample - -import unittest - - -class AdvancedTestSuite(unittest.TestCase): - """Advanced test cases.""" - - def test_thoughts(self): - self.assertIsNone(sample.hmm()) - - -if __name__ == '__main__': - unittest.main() diff --git a/tests/test_basic.py b/tests/test_basic.py index d3e4d5ac1e24a19d9c806bbd19f9f6c7d8c90e75..4d0e88683b32552b42b95b2da17a08f0dfac039f 100644 --- a/tests/test_basic.py +++ b/tests/test_basic.py @@ -1,16 +1,9 @@ -# -*- coding: utf-8 -*- +# content of test_sample.py -from .context import sample +from sample.helpers import get_tidy_data_path +import pandas as pd -import unittest - -class BasicTestSuite(unittest.TestCase): - """Basic test cases.""" - - def test_absolute_truth_and_meaning(self): - assert True - - -if __name__ == '__main__': - unittest.main() \ No newline at end of file +def test_check_dataframe_size(): + df = pd.read_parquet(get_tidy_data_path() / 'current_race.parquet') + assert df.shape[1] == 26