{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Finding Datasets\n", "This notebook shows how to find datasets for a state\n" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import openpolicedata as opd" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
StateSourceNameAgencyAgencyFullTableTypecoverage_startcoverage_endlast_coverage_checkDescriptionsource_urlreadmeURLYearDataTypedate_fielddataset_idagency_fieldmin_versionquery
0ArizonaChandlerChandlerChandler Police DepartmentARRESTS2018-01-012024-05-0805/09/2024Arrest reports completed by a Chandler Police ...https://data.chandlerpd.com/catalog/arrest-boo...<NA>https://data.chandlerpd.com/catalog/arrest-boo...MULTIPLECSVarrest_date_time<NA><NA>0.2NaN
1ArizonaChandlerChandlerChandler Police DepartmentCALLS FOR SERVICE2018-01-012024-05-0805/09/2024This dataset contains details for all of the c...https://data.chandlerpd.com/catalog/calls-for-...<NA>https://data.chandlerpd.com/catalog/calls-for-...MULTIPLECSVcall_received_date_time<NA><NA><NA>NaN
2ArizonaChandlerChandlerChandler Police DepartmentINCIDENTS2018-01-012024-05-0205/09/2024This dataset contains details for all of the g...https://data.chandlerpd.com/catalog/general-of...<NA>https://data.chandlerpd.com/catalog/general-of...MULTIPLECSVreport_event_date<NA><NA>0.4.1NaN
3ArizonaGilbertGilbertGilbert Police DepartmentCALLS FOR SERVICE2006-11-152024-05-0805/09/2024<NA>https://data.gilbertaz.gov/maps/2dcb4c20c9a444...<NA>https://maps.gilbertaz.gov/arcgis/rest/service...MULTIPLEArcGISEventDate<NA><NA><NA>NaN
4ArizonaGilbertGilbertGilbert Police DepartmentEMPLOYEENaTNaT07/06/2023A data set of all employees that have previous...https://data.gilbertaz.gov/datasets/TOG::gilbe...<NA>https://services1.arcgis.com/JLuzSHjNrLL4Okwb/...NONEArcGIS<NA><NA><NA><NA>NaN
\n", "
" ], "text/plain": [ " State SourceName Agency AgencyFull \\\n", "0 Arizona Chandler Chandler Chandler Police Department \n", "1 Arizona Chandler Chandler Chandler Police Department \n", "2 Arizona Chandler Chandler Chandler Police Department \n", "3 Arizona Gilbert Gilbert Gilbert Police Department \n", "4 Arizona Gilbert Gilbert Gilbert Police Department \n", "\n", " TableType coverage_start coverage_end last_coverage_check \\\n", "0 ARRESTS 2018-01-01 2024-05-08 05/09/2024 \n", "1 CALLS FOR SERVICE 2018-01-01 2024-05-08 05/09/2024 \n", "2 INCIDENTS 2018-01-01 2024-05-02 05/09/2024 \n", "3 CALLS FOR SERVICE 2006-11-15 2024-05-08 05/09/2024 \n", "4 EMPLOYEE NaT NaT 07/06/2023 \n", "\n", " Description \\\n", "0 Arrest reports completed by a Chandler Police ... \n", "1 This dataset contains details for all of the c... \n", "2 This dataset contains details for all of the g... \n", "3 \n", "4 A data set of all employees that have previous... \n", "\n", " source_url readme \\\n", "0 https://data.chandlerpd.com/catalog/arrest-boo... \n", "1 https://data.chandlerpd.com/catalog/calls-for-... \n", "2 https://data.chandlerpd.com/catalog/general-of... \n", "3 https://data.gilbertaz.gov/maps/2dcb4c20c9a444... \n", "4 https://data.gilbertaz.gov/datasets/TOG::gilbe... \n", "\n", " URL Year DataType \\\n", "0 https://data.chandlerpd.com/catalog/arrest-boo... MULTIPLE CSV \n", "1 https://data.chandlerpd.com/catalog/calls-for-... MULTIPLE CSV \n", "2 https://data.chandlerpd.com/catalog/general-of... MULTIPLE CSV \n", "3 https://maps.gilbertaz.gov/arcgis/rest/service... MULTIPLE ArcGIS \n", "4 https://services1.arcgis.com/JLuzSHjNrLL4Okwb/... NONE ArcGIS \n", "\n", " date_field dataset_id agency_field min_version query \n", "0 arrest_date_time 0.2 NaN \n", "1 call_received_date_time NaN \n", "2 report_event_date 0.4.1 NaN \n", "3 EventDate NaN \n", "4 NaN " ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Query for the entire table of available data as a pandas DataFrame (https://pandas.pydata.org/docs/user_guide/10min.html#min)\n", "# This shows all the datasets that are available for access\n", "# This information can be filtered to find a dataset of interest\n", "datasets = opd.datasets.query()\n", "datasets.head()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "These states have datasets: ['Arizona' 'Arkansas' 'California' 'Colorado' 'Connecticut' 'Delaware'\n", " 'District of Columbia' 'Florida' 'Georgia' 'Idaho' 'Illinois' 'Indiana'\n", " 'Iowa' 'Kansas' 'Kentucky' 'Louisiana' 'Maryland' 'Massachusetts'\n", " 'Michigan' 'Minnesota' 'Mississippi' 'Missouri' 'Montana' 'Nebraska'\n", " 'Nevada' 'New Hampshire' 'New Jersey' 'New York' 'North Carolina'\n", " 'North Dakota' 'Ohio' 'Oklahoma' 'Oregon' 'Pennsylvania' 'Rhode Island'\n", " 'South Carolina' 'South Dakota' 'Tennessee' 'Texas' 'Vermont' 'Virginia'\n", " 'Washington' 'Wisconsin' 'Wyoming']\n" ] } ], "source": [ "# Find out which states data is available for\n", "print(f\"These states have datasets: {datasets['State'].unique()}\")" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
StateSourceNameAgencyAgencyFullTableTypecoverage_startcoverage_endlast_coverage_checkDescriptionsource_urlreadmeURLYearDataTypedate_fielddataset_idagency_fieldmin_versionquery
470MarylandBaltimoreBaltimoreBaltimore Police DepartmentARRESTS2010-01-012024-05-0405/10/2024This dataset represents arrests made by the Ba...https://data.baltimorecity.gov/datasets/baltim...<NA>https://egis.baltimorecity.gov/egis/rest/servi...MULTIPLEArcGISArrestDateTime<NA><NA>0.2NaN
471MarylandBaltimoreBaltimoreBaltimore Police DepartmentCALLS FOR SERVICE2017-01-012017-12-3107/06/2023Police Emergency and Non-Emergency calls to 911https://data.baltimorecity.gov/datasets/baltim...<NA>https://services1.arcgis.com/UWYHeuuJISiGmgXx/...2017ArcGIS<NA><NA><NA><NA>NaN
472MarylandBaltimoreBaltimoreBaltimore Police DepartmentCALLS FOR SERVICE2018-01-012018-12-3107/06/2023Police Emergency and Non-Emergency calls to 912https://data.baltimorecity.gov/datasets/baltim...<NA>https://services1.arcgis.com/UWYHeuuJISiGmgXx/...2018ArcGIS<NA><NA><NA><NA>NaN
473MarylandBaltimoreBaltimoreBaltimore Police DepartmentCALLS FOR SERVICE2019-01-012019-12-3107/06/2023Police Emergency and Non-Emergency calls to 913https://data.baltimorecity.gov/datasets/baltim...<NA>https://services1.arcgis.com/UWYHeuuJISiGmgXx/...2019ArcGIS<NA><NA><NA><NA>NaN
474MarylandBaltimoreBaltimoreBaltimore Police DepartmentCALLS FOR SERVICE2020-01-012020-12-3107/06/2023Police Emergency and Non-Emergency calls to 914https://data.baltimorecity.gov/datasets/baltim...<NA>https://services1.arcgis.com/UWYHeuuJISiGmgXx/...2020ArcGIS<NA><NA><NA><NA>NaN
\n", "
" ], "text/plain": [ " State SourceName Agency AgencyFull \\\n", "470 Maryland Baltimore Baltimore Baltimore Police Department \n", "471 Maryland Baltimore Baltimore Baltimore Police Department \n", "472 Maryland Baltimore Baltimore Baltimore Police Department \n", "473 Maryland Baltimore Baltimore Baltimore Police Department \n", "474 Maryland Baltimore Baltimore Baltimore Police Department \n", "\n", " TableType coverage_start coverage_end last_coverage_check \\\n", "470 ARRESTS 2010-01-01 2024-05-04 05/10/2024 \n", "471 CALLS FOR SERVICE 2017-01-01 2017-12-31 07/06/2023 \n", "472 CALLS FOR SERVICE 2018-01-01 2018-12-31 07/06/2023 \n", "473 CALLS FOR SERVICE 2019-01-01 2019-12-31 07/06/2023 \n", "474 CALLS FOR SERVICE 2020-01-01 2020-12-31 07/06/2023 \n", "\n", " Description \\\n", "470 This dataset represents arrests made by the Ba... \n", "471 Police Emergency and Non-Emergency calls to 911 \n", "472 Police Emergency and Non-Emergency calls to 912 \n", "473 Police Emergency and Non-Emergency calls to 913 \n", "474 Police Emergency and Non-Emergency calls to 914 \n", "\n", " source_url readme \\\n", "470 https://data.baltimorecity.gov/datasets/baltim... \n", "471 https://data.baltimorecity.gov/datasets/baltim... \n", "472 https://data.baltimorecity.gov/datasets/baltim... \n", "473 https://data.baltimorecity.gov/datasets/baltim... \n", "474 https://data.baltimorecity.gov/datasets/baltim... \n", "\n", " URL Year DataType \\\n", "470 https://egis.baltimorecity.gov/egis/rest/servi... MULTIPLE ArcGIS \n", "471 https://services1.arcgis.com/UWYHeuuJISiGmgXx/... 2017 ArcGIS \n", "472 https://services1.arcgis.com/UWYHeuuJISiGmgXx/... 2018 ArcGIS \n", "473 https://services1.arcgis.com/UWYHeuuJISiGmgXx/... 2019 ArcGIS \n", "474 https://services1.arcgis.com/UWYHeuuJISiGmgXx/... 2020 ArcGIS \n", "\n", " date_field dataset_id agency_field min_version query \n", "470 ArrestDateTime 0.2 NaN \n", "471 NaN \n", "472 NaN \n", "473 NaN \n", "474 NaN " ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# To see all available datasets for a state use the following filter.\n", "df = opd.datasets.query(state=\"Maryland\")\n", "df.head()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Maryland has the following tables available: ['ARRESTS' 'CALLS FOR SERVICE' 'STOPS' 'TRAFFIC STOPS' 'COMPLAINTS'\n", " 'CRASHES - INCIDENTS' 'CRASHES - NONMOTORIST' 'CRASHES - SUBJECTS'\n", " 'INCIDENTS']\n" ] } ], "source": [ "# Now further filter the dataset by looking for particular data in a particular state\n", "# First look at the table data options for the state\n", "df = opd.datasets.query(state=\"Maryland\")\n", "print(f\"{df.iloc[0]['State']} has the following tables available: {df['TableType'].unique()}\")\n" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
StateSourceNameAgencyAgencyFullTableTypecoverage_startcoverage_endlast_coverage_checkDescriptionsource_urlreadmeURLYearDataTypedate_fielddataset_idagency_fieldmin_versionquery
479MarylandMarylandMULTIPLENaNTRAFFIC STOPS2007-01-012014-03-3101/10/2024Standardized stop data from the Stanford Open ...https://openpolicing.stanford.edu/data/https://github.com/stanford-policylab/opp/blob...https://stacks.stanford.edu/file/druid:yg821jf...MULTIPLECSVdate<NA>department_name<NA>NaN
485MarylandMontgomery CountyMontgomery CountyMontgomery County Police DepartmentTRAFFIC STOPS2012-06-072024-05-0905/10/2024This dataset contains traffic violation inform...https://data.montgomerycountymd.gov/Public-Saf...<NA>data.montgomerycountymd.govMULTIPLESocratadate_of_stop4mse-ku6q<NA><NA>NaN
\n", "
" ], "text/plain": [ " State SourceName Agency \\\n", "479 Maryland Maryland MULTIPLE \n", "485 Maryland Montgomery County Montgomery County \n", "\n", " AgencyFull TableType coverage_start \\\n", "479 NaN TRAFFIC STOPS 2007-01-01 \n", "485 Montgomery County Police Department TRAFFIC STOPS 2012-06-07 \n", "\n", " coverage_end last_coverage_check \\\n", "479 2014-03-31 01/10/2024 \n", "485 2024-05-09 05/10/2024 \n", "\n", " Description \\\n", "479 Standardized stop data from the Stanford Open ... \n", "485 This dataset contains traffic violation inform... \n", "\n", " source_url \\\n", "479 https://openpolicing.stanford.edu/data/ \n", "485 https://data.montgomerycountymd.gov/Public-Saf... \n", "\n", " readme \\\n", "479 https://github.com/stanford-policylab/opp/blob... \n", "485 \n", "\n", " URL Year DataType \\\n", "479 https://stacks.stanford.edu/file/druid:yg821jf... MULTIPLE CSV \n", "485 data.montgomerycountymd.gov MULTIPLE Socrata \n", "\n", " date_field dataset_id agency_field min_version query \n", "479 date department_name NaN \n", "485 date_of_stop 4mse-ku6q NaN " ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# For example query for all traffic stops data containing traffic stops in Maryland select the table_type using the names from the previous cell.\n", "df = opd.datasets.query(table_type='TRAFFIC STOPS', state=\"Maryland\")\n", "df.head()\n", "# To learn how to load the data open the notebook: loading_datasets.ipynb" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3.9.12 ('opd')", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.7" }, "orig_nbformat": 4, "vscode": { "interpreter": { "hash": "a73158d29711b2da05ac73de25b71e5d8cae591f14917bba77a9573b5c85a0ce" } } }, "nbformat": 4, "nbformat_minor": 2 }