{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Export Dataset to CSV" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "This notebook shows an example of how to export a dataset to a csv file. \n", "It assumes you found the dataset using techniques shown in `finding_datasets.ipynb` and loaded the dataset using `loading_datasets.ipynb`" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "try:\n", " import openpolicedata as opd #This import should be last in the try block because the expect block will only try to load it\n", "except:\n", " import sys\n", " sys.path.append('../openpolicedata')\n", " import openpolicedata as opd" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
StateSourceNameAgencyTableTypeYearDescriptionDataTypeURLdate_fielddataset_idagency_fieldmin_versionreadme
5MarylandMontgomery CountyMontgomery CountyTRAFFIC STOPSMULTIThis dataset contains traffic violation inform...Socratadata.montgomerycountymd.govdate_of_stop4mse-ku6q<NA><NA>https://data.montgomerycountymd.gov/Public-Saf...
6MarylandMontgomery CountyMontgomery CountyCOMPLAINTSMULTIThis dataset contains allegations brought to t...Socratadata.montgomerycountymd.govcreated_dtusip-62e2<NA><NA>https://data.montgomerycountymd.gov/Public-Saf...
\n", "
" ], "text/plain": [ " State SourceName Agency TableType Year \\\n", "5 Maryland Montgomery County Montgomery County TRAFFIC STOPS MULTI \n", "6 Maryland Montgomery County Montgomery County COMPLAINTS MULTI \n", "\n", " Description DataType \\\n", "5 This dataset contains traffic violation inform... Socrata \n", "6 This dataset contains allegations brought to t... Socrata \n", "\n", " URL date_field dataset_id agency_field \\\n", "5 data.montgomerycountymd.gov date_of_stop 4mse-ku6q \n", "6 data.montgomerycountymd.gov created_dt usip-62e2 \n", "\n", " min_version readme \n", "5 https://data.montgomerycountymd.gov/Public-Saf... \n", "6 https://data.montgomerycountymd.gov/Public-Saf... " ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# To access the data, create a source using a Source Name (usually a police department name). There is an optional state input to clarify ambiguities.\n", "# We will use the above cell's information for Maryland to choose the agency \"Montgomery County\" which we select for the source_name\n", "\n", "src = opd.Source(source_name=\"Montgomery County\", state=\"Maryland\")\n", "src.datasets.head()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "# Load traffic stop data for 2021\n", "t = src.load(table_type='TRAFFIC STOPS', year=2021)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
geometryseq_iddate_of_stoptime_of_stopagencysubagencydescriptionlocationlatitudelongitude...driver_statedl_statearrest_typesearch_conductedsearch_outcomesearch_reason_for_stopsearch_dispositionsearch_reasonsearch_typesearch_arrest_reason
0POINT (-77.27504 39.14653)123add05-d3d2-428d-9932-66bc308313882021-01-0123:03:00MCP5th District, GermantownDISPLAYING EXPIRED REGISTRATION PLATE ISSUED B...GREAT SENECA @ WSSC ENTRANCE39.1465333333333-77.2750433333333...MDMDQ - Marked LaserNaNNaNNaNNaNNaNNaNNaN
1POINT (-77.27504 39.14653)123add05-d3d2-428d-9932-66bc308313882021-01-0123:03:00MCP5th District, GermantownEXCEEDING POSTED MAXIMUM SPEED LIMIT: 64 MPH I...GREAT SENECA @ WSSC ENTRANCE39.1465333333333-77.2750433333333...MDMDQ - Marked LaserNaNNaNNaNNaNNaNNaNNaN
2POINT (-77.27504 39.14653)123add05-d3d2-428d-9932-66bc308313882021-01-0123:03:00MCP5th District, GermantownKNOWINGLY DRIVING UNINSURED VEHICLEGREAT SENECA @ WSSC ENTRANCE39.1465333333333-77.2750433333333...MDMDQ - Marked LaserNaNNaNNaNNaNNaNNaNNaN
3POINT (-77.27285 39.14366)1b7c9229-d80f-4ed2-9692-d24a6fbda5c72021-01-0122:43:00MCP5th District, GermantownDRIVING VEHICLE IN EXCESS OF REASONABLE AND PR...GREAT SENECA @ HORN POINT39.1436583333333-77.2728533333333...MDMDA - Marked PatrolNoWarning21-801(a)NaNNaNNaNNaN
4POINT (-77.27405 39.17419)0c6f50ae-d462-4356-8319-e1f035dc00fc2021-01-0122:20:00MCP5th District, GermantownDRIVER CHANGING LANES WHEN UNSAFE118 @ WALTERJOHNSON39.174195-77.274045...MDMDA - Marked PatrolNoWarning21-309(b)NaNNaNNaNNaN
\n", "

5 rows × 43 columns

\n", "
" ], "text/plain": [ " geometry seq_id \\\n", "0 POINT (-77.27504 39.14653) 123add05-d3d2-428d-9932-66bc30831388 \n", "1 POINT (-77.27504 39.14653) 123add05-d3d2-428d-9932-66bc30831388 \n", "2 POINT (-77.27504 39.14653) 123add05-d3d2-428d-9932-66bc30831388 \n", "3 POINT (-77.27285 39.14366) 1b7c9229-d80f-4ed2-9692-d24a6fbda5c7 \n", "4 POINT (-77.27405 39.17419) 0c6f50ae-d462-4356-8319-e1f035dc00fc \n", "\n", " date_of_stop time_of_stop agency subagency \\\n", "0 2021-01-01 23:03:00 MCP 5th District, Germantown \n", "1 2021-01-01 23:03:00 MCP 5th District, Germantown \n", "2 2021-01-01 23:03:00 MCP 5th District, Germantown \n", "3 2021-01-01 22:43:00 MCP 5th District, Germantown \n", "4 2021-01-01 22:20:00 MCP 5th District, Germantown \n", "\n", " description \\\n", "0 DISPLAYING EXPIRED REGISTRATION PLATE ISSUED B... \n", "1 EXCEEDING POSTED MAXIMUM SPEED LIMIT: 64 MPH I... \n", "2 KNOWINGLY DRIVING UNINSURED VEHICLE \n", "3 DRIVING VEHICLE IN EXCESS OF REASONABLE AND PR... \n", "4 DRIVER CHANGING LANES WHEN UNSAFE \n", "\n", " location latitude longitude ... \\\n", "0 GREAT SENECA @ WSSC ENTRANCE 39.1465333333333 -77.2750433333333 ... \n", "1 GREAT SENECA @ WSSC ENTRANCE 39.1465333333333 -77.2750433333333 ... \n", "2 GREAT SENECA @ WSSC ENTRANCE 39.1465333333333 -77.2750433333333 ... \n", "3 GREAT SENECA @ HORN POINT 39.1436583333333 -77.2728533333333 ... \n", "4 118 @ WALTERJOHNSON 39.174195 -77.274045 ... \n", "\n", " driver_state dl_state arrest_type search_conducted search_outcome \\\n", "0 MD MD Q - Marked Laser NaN NaN \n", "1 MD MD Q - Marked Laser NaN NaN \n", "2 MD MD Q - Marked Laser NaN NaN \n", "3 MD MD A - Marked Patrol No Warning \n", "4 MD MD A - Marked Patrol No Warning \n", "\n", " search_reason_for_stop search_disposition search_reason search_type \\\n", "0 NaN NaN NaN NaN \n", "1 NaN NaN NaN NaN \n", "2 NaN NaN NaN NaN \n", "3 21-801(a) NaN NaN NaN \n", "4 21-309(b) NaN NaN NaN \n", "\n", " search_arrest_reason \n", "0 NaN \n", "1 NaN \n", "2 NaN \n", "3 NaN \n", "4 NaN \n", "\n", "[5 rows x 43 columns]" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Show the first 5 rows of the table\n", "t.table.head(n=5)\n", "# Now you are ready for analyzing the data in the table t." ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The CSV file will be written to c:\\Users\\matth\\repos\\opd-examples. Make sure this path is okay before running the next cell. If the path is not okay then modify csv_filepath.\n" ] } ], "source": [ "import os\n", "cwd = os.getcwd()\n", "csv_filepath = cwd\n", "\n", "print(f\"The CSV file will be written to {csv_filepath}. Make sure this path is okay before running the next cell. If the path is not okay then modify csv_filepath.\")\n" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The CSV file was written to c:\\Users\\matth\\repos\\opd-examples\\Maryland_Montgomery_County_TRAFFIC_STOPS_2021.csv.\n" ] } ], "source": [ "# Save to CSV. To specify a custom filename, set the filename input\n", "csv_written_filename=t.to_csv(output_dir=csv_filepath)\n", "print(f\"The CSV file was written to {csv_written_filename}.\")" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
geometryseq_iddate_of_stoptime_of_stopagencysubagencydescriptionlocationlatitudelongitude...driver_statedl_statearrest_typesearch_conductedsearch_outcomesearch_reason_for_stopsearch_dispositionsearch_reasonsearch_typesearch_arrest_reason
0POINT (-77.2750433333333 39.1465333333333)123add05-d3d2-428d-9932-66bc308313882021-01-0123:03:00MCP5th District, GermantownDISPLAYING EXPIRED REGISTRATION PLATE ISSUED B...GREAT SENECA @ WSSC ENTRANCE39.146533-77.275043...MDMDQ - Marked LaserNaNNaNNaNNaNNaNNaNNaN
1POINT (-77.2750433333333 39.1465333333333)123add05-d3d2-428d-9932-66bc308313882021-01-0123:03:00MCP5th District, GermantownEXCEEDING POSTED MAXIMUM SPEED LIMIT: 64 MPH I...GREAT SENECA @ WSSC ENTRANCE39.146533-77.275043...MDMDQ - Marked LaserNaNNaNNaNNaNNaNNaNNaN
2POINT (-77.2750433333333 39.1465333333333)123add05-d3d2-428d-9932-66bc308313882021-01-0123:03:00MCP5th District, GermantownKNOWINGLY DRIVING UNINSURED VEHICLEGREAT SENECA @ WSSC ENTRANCE39.146533-77.275043...MDMDQ - Marked LaserNaNNaNNaNNaNNaNNaNNaN
3POINT (-77.2728533333333 39.1436583333333)1b7c9229-d80f-4ed2-9692-d24a6fbda5c72021-01-0122:43:00MCP5th District, GermantownDRIVING VEHICLE IN EXCESS OF REASONABLE AND PR...GREAT SENECA @ HORN POINT39.143658-77.272853...MDMDA - Marked PatrolNoWarning21-801(a)NaNNaNNaNNaN
4POINT (-77.274045 39.174195)0c6f50ae-d462-4356-8319-e1f035dc00fc2021-01-0122:20:00MCP5th District, GermantownDRIVER CHANGING LANES WHEN UNSAFE118 @ WALTERJOHNSON39.174195-77.274045...MDMDA - Marked PatrolNoWarning21-309(b)NaNNaNNaNNaN
\n", "

5 rows × 43 columns

\n", "
" ], "text/plain": [ " geometry \\\n", "0 POINT (-77.2750433333333 39.1465333333333) \n", "1 POINT (-77.2750433333333 39.1465333333333) \n", "2 POINT (-77.2750433333333 39.1465333333333) \n", "3 POINT (-77.2728533333333 39.1436583333333) \n", "4 POINT (-77.274045 39.174195) \n", "\n", " seq_id date_of_stop time_of_stop agency \\\n", "0 123add05-d3d2-428d-9932-66bc30831388 2021-01-01 23:03:00 MCP \n", "1 123add05-d3d2-428d-9932-66bc30831388 2021-01-01 23:03:00 MCP \n", "2 123add05-d3d2-428d-9932-66bc30831388 2021-01-01 23:03:00 MCP \n", "3 1b7c9229-d80f-4ed2-9692-d24a6fbda5c7 2021-01-01 22:43:00 MCP \n", "4 0c6f50ae-d462-4356-8319-e1f035dc00fc 2021-01-01 22:20:00 MCP \n", "\n", " subagency \\\n", "0 5th District, Germantown \n", "1 5th District, Germantown \n", "2 5th District, Germantown \n", "3 5th District, Germantown \n", "4 5th District, Germantown \n", "\n", " description \\\n", "0 DISPLAYING EXPIRED REGISTRATION PLATE ISSUED B... \n", "1 EXCEEDING POSTED MAXIMUM SPEED LIMIT: 64 MPH I... \n", "2 KNOWINGLY DRIVING UNINSURED VEHICLE \n", "3 DRIVING VEHICLE IN EXCESS OF REASONABLE AND PR... \n", "4 DRIVER CHANGING LANES WHEN UNSAFE \n", "\n", " location latitude longitude ... driver_state \\\n", "0 GREAT SENECA @ WSSC ENTRANCE 39.146533 -77.275043 ... MD \n", "1 GREAT SENECA @ WSSC ENTRANCE 39.146533 -77.275043 ... MD \n", "2 GREAT SENECA @ WSSC ENTRANCE 39.146533 -77.275043 ... MD \n", "3 GREAT SENECA @ HORN POINT 39.143658 -77.272853 ... MD \n", "4 118 @ WALTERJOHNSON 39.174195 -77.274045 ... MD \n", "\n", " dl_state arrest_type search_conducted search_outcome \\\n", "0 MD Q - Marked Laser NaN NaN \n", "1 MD Q - Marked Laser NaN NaN \n", "2 MD Q - Marked Laser NaN NaN \n", "3 MD A - Marked Patrol No Warning \n", "4 MD A - Marked Patrol No Warning \n", "\n", " search_reason_for_stop search_disposition search_reason search_type \\\n", "0 NaN NaN NaN NaN \n", "1 NaN NaN NaN NaN \n", "2 NaN NaN NaN NaN \n", "3 21-801(a) NaN NaN NaN \n", "4 21-309(b) NaN NaN NaN \n", "\n", " search_arrest_reason \n", "0 NaN \n", "1 NaN \n", "2 NaN \n", "3 NaN \n", "4 NaN \n", "\n", "[5 rows x 43 columns]" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# To load data back in from CSV, create a new source and use load_from_csv\n", "# load_from_csv usage is similar to load except for the output_dir\n", "# input\n", "src = opd.Source(source_name=\"Montgomery County\", state=\"Maryland\")\n", "t = src.load_from_csv(year=2021, table_type='TRAFFIC STOPS', output_dir=csv_filepath)\n", "t.table.head()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3.9.12 ('opd')", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.10" }, "orig_nbformat": 4, "vscode": { "interpreter": { "hash": "a73158d29711b2da05ac73de25b71e5d8cae591f14917bba77a9573b5c85a0ce" } } }, "nbformat": 4, "nbformat_minor": 2 }