{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Finding Datasets\n", "This notebook shows how to find datasets for a state\n" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "try:\n", " import openpolicedata as opd #This import should be last in the try block because the expect block will only try to load it\n", "except:\n", " import sys\n", " sys.path.append('../openpolicedata')\n", " import openpolicedata as opd" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
StateSourceNameAgencyTableTypeYearDescriptionDataTypeURLdate_fielddataset_idagency_fieldmin_versionreadme
0VirginiaVirginiaMULTISTOPSMULTIA data collection consisting of all traffic an...Socratadata.virginia.govincident_date2c96-texwagency_name<NA>https://data.virginia.gov/api/views/2c96-texw/...
1VirginiaFairfax CountyFairfax CountyTRAFFIC WARNINGS2019Traffic Warnings issued by Fairfax County PoliceArcGIShttps://services9.arcgis.com/kYvfX7YK8OobHItA/...actdate<NA><NA><NA><NA>
2VirginiaFairfax CountyFairfax CountyTRAFFIC WARNINGS2020Traffic Warnings issued by Fairfax County PoliceArcGIShttps://services9.arcgis.com/kYvfX7YK8OobHItA/...actdate<NA><NA><NA><NA>
3VirginiaFairfax CountyFairfax CountyTRAFFIC CITATIONS2019Traffic Citations issued by Fairfax County PoliceArcGIShttps://services9.arcgis.com/kYvfX7YK8OobHItA/...tc_date<NA><NA><NA><NA>
4VirginiaFairfax CountyFairfax CountyTRAFFIC CITATIONS2020Traffic Citations issued by Fairfax County PoliceArcGIShttps://services9.arcgis.com/kYvfX7YK8OobHItA/...tc_date<NA><NA><NA><NA>
\n", "
" ], "text/plain": [ " State SourceName Agency TableType Year \\\n", "0 Virginia Virginia MULTI STOPS MULTI \n", "1 Virginia Fairfax County Fairfax County TRAFFIC WARNINGS 2019 \n", "2 Virginia Fairfax County Fairfax County TRAFFIC WARNINGS 2020 \n", "3 Virginia Fairfax County Fairfax County TRAFFIC CITATIONS 2019 \n", "4 Virginia Fairfax County Fairfax County TRAFFIC CITATIONS 2020 \n", "\n", " Description DataType \\\n", "0 A data collection consisting of all traffic an... Socrata \n", "1 Traffic Warnings issued by Fairfax County Police ArcGIS \n", "2 Traffic Warnings issued by Fairfax County Police ArcGIS \n", "3 Traffic Citations issued by Fairfax County Police ArcGIS \n", "4 Traffic Citations issued by Fairfax County Police ArcGIS \n", "\n", " URL date_field \\\n", "0 data.virginia.gov incident_date \n", "1 https://services9.arcgis.com/kYvfX7YK8OobHItA/... actdate \n", "2 https://services9.arcgis.com/kYvfX7YK8OobHItA/... actdate \n", "3 https://services9.arcgis.com/kYvfX7YK8OobHItA/... tc_date \n", "4 https://services9.arcgis.com/kYvfX7YK8OobHItA/... tc_date \n", "\n", " dataset_id agency_field min_version \\\n", "0 2c96-texw agency_name \n", "1 \n", "2 \n", "3 \n", "4 \n", "\n", " readme \n", "0 https://data.virginia.gov/api/views/2c96-texw/... \n", "1 \n", "2 \n", "3 \n", "4 " ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Query for the entire table of available data as a pandas DataFrame (https://pandas.pydata.org/docs/user_guide/10min.html#min)\n", "# This shows all the datasets that are available for access\n", "# This information can be filtered to find a dataset of interest\n", "datasets = opd.datasets.query()\n", "datasets.head()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "These states have datasets: ['Virginia' 'Maryland' 'Colorado' 'North Carolina' 'California' 'Arkansas'\n", " 'Arizona' 'Connecticut' 'Florida' 'Georgia' 'Iowa' 'Idaho' 'Illinois'\n", " 'Indiana' 'Kansas' 'Kentucky' 'Massachusetts' 'Michigan' 'Minnesota'\n", " 'Missouri' 'Mississippi' 'Montana' 'North Dakota' 'Nebraska'\n", " 'New Hampshire' 'Nevada' 'New York' 'Ohio' 'Oklahoma' 'Oregon'\n", " 'Pennsylvania' 'Rhode Island' 'South Carolina' 'South Dakota' 'Tennessee'\n", " 'Texas' 'Vermont' 'Washington' 'Wisconsin' 'Wyoming' 'New Jersey'\n", " 'Louisiana']\n" ] } ], "source": [ "# Find out which states data is available for\n", "print(f\"These states have datasets: {datasets['State'].unique()}\")" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
StateSourceNameAgencyTableTypeYearDescriptionDataTypeURLdate_fielddataset_idagency_fieldmin_versionreadme
5MarylandMontgomery CountyMontgomery CountyTRAFFIC STOPSMULTIThis dataset contains traffic violation inform...Socratadata.montgomerycountymd.govdate_of_stop4mse-ku6q<NA><NA>https://data.montgomerycountymd.gov/Public-Saf...
6MarylandMontgomery CountyMontgomery CountyCOMPLAINTSMULTIThis dataset contains allegations brought to t...Socratadata.montgomerycountymd.govcreated_dtusip-62e2<NA><NA>https://data.montgomerycountymd.gov/Public-Saf...
55MarylandBaltimoreBaltimoreSTOPSMULTIStandardized stop data from the Stanford Open ...CSVhttps://stacks.stanford.edu/file/druid:yg821jf...date<NA><NA><NA>https://github.com/stanford-policylab/opp/blob...
56MarylandMarylandMULTITRAFFIC STOPSMULTIStandardized stop data from the Stanford Open ...CSVhttps://stacks.stanford.edu/file/druid:yg821jf...date<NA>department_name<NA>https://github.com/stanford-policylab/opp/blob...
367MarylandBaltimoreBaltimoreCALLS FOR SERVICE2017Police Emergency and Non-Emergency calls to 911ArcGIShttps://opendata.baltimorecity.gov/egis/rest/s...<NA><NA><NA><NA><NA>
\n", "
" ], "text/plain": [ " State SourceName Agency TableType Year \\\n", "5 Maryland Montgomery County Montgomery County TRAFFIC STOPS MULTI \n", "6 Maryland Montgomery County Montgomery County COMPLAINTS MULTI \n", "55 Maryland Baltimore Baltimore STOPS MULTI \n", "56 Maryland Maryland MULTI TRAFFIC STOPS MULTI \n", "367 Maryland Baltimore Baltimore CALLS FOR SERVICE 2017 \n", "\n", " Description DataType \\\n", "5 This dataset contains traffic violation inform... Socrata \n", "6 This dataset contains allegations brought to t... Socrata \n", "55 Standardized stop data from the Stanford Open ... CSV \n", "56 Standardized stop data from the Stanford Open ... CSV \n", "367 Police Emergency and Non-Emergency calls to 911 ArcGIS \n", "\n", " URL date_field \\\n", "5 data.montgomerycountymd.gov date_of_stop \n", "6 data.montgomerycountymd.gov created_dt \n", "55 https://stacks.stanford.edu/file/druid:yg821jf... date \n", "56 https://stacks.stanford.edu/file/druid:yg821jf... date \n", "367 https://opendata.baltimorecity.gov/egis/rest/s... \n", "\n", " dataset_id agency_field min_version \\\n", "5 4mse-ku6q \n", "6 usip-62e2 \n", "55 \n", "56 department_name \n", "367 \n", "\n", " readme \n", "5 https://data.montgomerycountymd.gov/Public-Saf... \n", "6 https://data.montgomerycountymd.gov/Public-Saf... \n", "55 https://github.com/stanford-policylab/opp/blob... \n", "56 https://github.com/stanford-policylab/opp/blob... \n", "367 " ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# To see all available datasets for a state use the following filter.\n", "df = opd.datasets.query(state=\"Maryland\")\n", "df.head()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Maryland has the following tables available: ['TRAFFIC STOPS' 'COMPLAINTS' 'STOPS' 'CALLS FOR SERVICE' 'ARRESTS']\n" ] } ], "source": [ "# Now further filter the dataset by looking for particular data in a particular state\n", "# First look at the table data options for the state\n", "df = opd.datasets.query(state=\"Maryland\")\n", "print(f\"{df.iloc[0]['State']} has the following tables available: {df['TableType'].unique()}\")\n" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
StateSourceNameAgencyTableTypeYearDescriptionDataTypeURLdate_fielddataset_idagency_fieldmin_versionreadme
5MarylandMontgomery CountyMontgomery CountyTRAFFIC STOPSMULTIThis dataset contains traffic violation inform...Socratadata.montgomerycountymd.govdate_of_stop4mse-ku6q<NA><NA>https://data.montgomerycountymd.gov/Public-Saf...
56MarylandMarylandMULTITRAFFIC STOPSMULTIStandardized stop data from the Stanford Open ...CSVhttps://stacks.stanford.edu/file/druid:yg821jf...date<NA>department_name<NA>https://github.com/stanford-policylab/opp/blob...
\n", "
" ], "text/plain": [ " State SourceName Agency TableType Year \\\n", "5 Maryland Montgomery County Montgomery County TRAFFIC STOPS MULTI \n", "56 Maryland Maryland MULTI TRAFFIC STOPS MULTI \n", "\n", " Description DataType \\\n", "5 This dataset contains traffic violation inform... Socrata \n", "56 Standardized stop data from the Stanford Open ... CSV \n", "\n", " URL date_field \\\n", "5 data.montgomerycountymd.gov date_of_stop \n", "56 https://stacks.stanford.edu/file/druid:yg821jf... date \n", "\n", " dataset_id agency_field min_version \\\n", "5 4mse-ku6q \n", "56 department_name \n", "\n", " readme \n", "5 https://data.montgomerycountymd.gov/Public-Saf... \n", "56 https://github.com/stanford-policylab/opp/blob... " ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# For example query for all traffic stops data containing traffic stops in Maryland select the table_type using the names from the previous cell.\n", "df = opd.datasets.query(table_type='TRAFFIC STOPS', state=\"Maryland\")\n", "df.head()\n", "# To learn how to load the data open the notebook: loading_datasets.ipynb" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3.9.12 ('opd')", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.10" }, "orig_nbformat": 4, "vscode": { "interpreter": { "hash": "a73158d29711b2da05ac73de25b71e5d8cae591f14917bba77a9573b5c85a0ce" } } }, "nbformat": 4, "nbformat_minor": 2 }