{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Finding Datasets\n",
"This notebook shows how to find datasets for a state\n"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"try:\n",
" import openpolicedata as opd #This import should be last in the try block because the expect block will only try to load it\n",
"except:\n",
" import sys\n",
" sys.path.append('../openpolicedata')\n",
" import openpolicedata as opd"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" State | \n",
" SourceName | \n",
" Agency | \n",
" TableType | \n",
" Year | \n",
" Description | \n",
" DataType | \n",
" URL | \n",
" date_field | \n",
" dataset_id | \n",
" agency_field | \n",
" min_version | \n",
" readme | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" Virginia | \n",
" Virginia | \n",
" MULTI | \n",
" STOPS | \n",
" MULTI | \n",
" A data collection consisting of all traffic an... | \n",
" Socrata | \n",
" data.virginia.gov | \n",
" incident_date | \n",
" 2c96-texw | \n",
" agency_name | \n",
" <NA> | \n",
" https://data.virginia.gov/api/views/2c96-texw/... | \n",
"
\n",
" \n",
" | 1 | \n",
" Virginia | \n",
" Fairfax County | \n",
" Fairfax County | \n",
" TRAFFIC WARNINGS | \n",
" 2019 | \n",
" Traffic Warnings issued by Fairfax County Police | \n",
" ArcGIS | \n",
" https://services9.arcgis.com/kYvfX7YK8OobHItA/... | \n",
" actdate | \n",
" <NA> | \n",
" <NA> | \n",
" <NA> | \n",
" <NA> | \n",
"
\n",
" \n",
" | 2 | \n",
" Virginia | \n",
" Fairfax County | \n",
" Fairfax County | \n",
" TRAFFIC WARNINGS | \n",
" 2020 | \n",
" Traffic Warnings issued by Fairfax County Police | \n",
" ArcGIS | \n",
" https://services9.arcgis.com/kYvfX7YK8OobHItA/... | \n",
" actdate | \n",
" <NA> | \n",
" <NA> | \n",
" <NA> | \n",
" <NA> | \n",
"
\n",
" \n",
" | 3 | \n",
" Virginia | \n",
" Fairfax County | \n",
" Fairfax County | \n",
" TRAFFIC CITATIONS | \n",
" 2019 | \n",
" Traffic Citations issued by Fairfax County Police | \n",
" ArcGIS | \n",
" https://services9.arcgis.com/kYvfX7YK8OobHItA/... | \n",
" tc_date | \n",
" <NA> | \n",
" <NA> | \n",
" <NA> | \n",
" <NA> | \n",
"
\n",
" \n",
" | 4 | \n",
" Virginia | \n",
" Fairfax County | \n",
" Fairfax County | \n",
" TRAFFIC CITATIONS | \n",
" 2020 | \n",
" Traffic Citations issued by Fairfax County Police | \n",
" ArcGIS | \n",
" https://services9.arcgis.com/kYvfX7YK8OobHItA/... | \n",
" tc_date | \n",
" <NA> | \n",
" <NA> | \n",
" <NA> | \n",
" <NA> | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" State SourceName Agency TableType Year \\\n",
"0 Virginia Virginia MULTI STOPS MULTI \n",
"1 Virginia Fairfax County Fairfax County TRAFFIC WARNINGS 2019 \n",
"2 Virginia Fairfax County Fairfax County TRAFFIC WARNINGS 2020 \n",
"3 Virginia Fairfax County Fairfax County TRAFFIC CITATIONS 2019 \n",
"4 Virginia Fairfax County Fairfax County TRAFFIC CITATIONS 2020 \n",
"\n",
" Description DataType \\\n",
"0 A data collection consisting of all traffic an... Socrata \n",
"1 Traffic Warnings issued by Fairfax County Police ArcGIS \n",
"2 Traffic Warnings issued by Fairfax County Police ArcGIS \n",
"3 Traffic Citations issued by Fairfax County Police ArcGIS \n",
"4 Traffic Citations issued by Fairfax County Police ArcGIS \n",
"\n",
" URL date_field \\\n",
"0 data.virginia.gov incident_date \n",
"1 https://services9.arcgis.com/kYvfX7YK8OobHItA/... actdate \n",
"2 https://services9.arcgis.com/kYvfX7YK8OobHItA/... actdate \n",
"3 https://services9.arcgis.com/kYvfX7YK8OobHItA/... tc_date \n",
"4 https://services9.arcgis.com/kYvfX7YK8OobHItA/... tc_date \n",
"\n",
" dataset_id agency_field min_version \\\n",
"0 2c96-texw agency_name \n",
"1 \n",
"2 \n",
"3 \n",
"4 \n",
"\n",
" readme \n",
"0 https://data.virginia.gov/api/views/2c96-texw/... \n",
"1 \n",
"2 \n",
"3 \n",
"4 "
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Query for the entire table of available data as a pandas DataFrame (https://pandas.pydata.org/docs/user_guide/10min.html#min)\n",
"# This shows all the datasets that are available for access\n",
"# This information can be filtered to find a dataset of interest\n",
"datasets = opd.datasets.query()\n",
"datasets.head()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"These states have datasets: ['Virginia' 'Maryland' 'Colorado' 'North Carolina' 'California' 'Arkansas'\n",
" 'Arizona' 'Connecticut' 'Florida' 'Georgia' 'Iowa' 'Idaho' 'Illinois'\n",
" 'Indiana' 'Kansas' 'Kentucky' 'Massachusetts' 'Michigan' 'Minnesota'\n",
" 'Missouri' 'Mississippi' 'Montana' 'North Dakota' 'Nebraska'\n",
" 'New Hampshire' 'Nevada' 'New York' 'Ohio' 'Oklahoma' 'Oregon'\n",
" 'Pennsylvania' 'Rhode Island' 'South Carolina' 'South Dakota' 'Tennessee'\n",
" 'Texas' 'Vermont' 'Washington' 'Wisconsin' 'Wyoming' 'New Jersey'\n",
" 'Louisiana']\n"
]
}
],
"source": [
"# Find out which states data is available for\n",
"print(f\"These states have datasets: {datasets['State'].unique()}\")"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" State | \n",
" SourceName | \n",
" Agency | \n",
" TableType | \n",
" Year | \n",
" Description | \n",
" DataType | \n",
" URL | \n",
" date_field | \n",
" dataset_id | \n",
" agency_field | \n",
" min_version | \n",
" readme | \n",
"
\n",
" \n",
" \n",
" \n",
" | 5 | \n",
" Maryland | \n",
" Montgomery County | \n",
" Montgomery County | \n",
" TRAFFIC STOPS | \n",
" MULTI | \n",
" This dataset contains traffic violation inform... | \n",
" Socrata | \n",
" data.montgomerycountymd.gov | \n",
" date_of_stop | \n",
" 4mse-ku6q | \n",
" <NA> | \n",
" <NA> | \n",
" https://data.montgomerycountymd.gov/Public-Saf... | \n",
"
\n",
" \n",
" | 6 | \n",
" Maryland | \n",
" Montgomery County | \n",
" Montgomery County | \n",
" COMPLAINTS | \n",
" MULTI | \n",
" This dataset contains allegations brought to t... | \n",
" Socrata | \n",
" data.montgomerycountymd.gov | \n",
" created_dt | \n",
" usip-62e2 | \n",
" <NA> | \n",
" <NA> | \n",
" https://data.montgomerycountymd.gov/Public-Saf... | \n",
"
\n",
" \n",
" | 55 | \n",
" Maryland | \n",
" Baltimore | \n",
" Baltimore | \n",
" STOPS | \n",
" MULTI | \n",
" Standardized stop data from the Stanford Open ... | \n",
" CSV | \n",
" https://stacks.stanford.edu/file/druid:yg821jf... | \n",
" date | \n",
" <NA> | \n",
" <NA> | \n",
" <NA> | \n",
" https://github.com/stanford-policylab/opp/blob... | \n",
"
\n",
" \n",
" | 56 | \n",
" Maryland | \n",
" Maryland | \n",
" MULTI | \n",
" TRAFFIC STOPS | \n",
" MULTI | \n",
" Standardized stop data from the Stanford Open ... | \n",
" CSV | \n",
" https://stacks.stanford.edu/file/druid:yg821jf... | \n",
" date | \n",
" <NA> | \n",
" department_name | \n",
" <NA> | \n",
" https://github.com/stanford-policylab/opp/blob... | \n",
"
\n",
" \n",
" | 367 | \n",
" Maryland | \n",
" Baltimore | \n",
" Baltimore | \n",
" CALLS FOR SERVICE | \n",
" 2017 | \n",
" Police Emergency and Non-Emergency calls to 911 | \n",
" ArcGIS | \n",
" https://opendata.baltimorecity.gov/egis/rest/s... | \n",
" <NA> | \n",
" <NA> | \n",
" <NA> | \n",
" <NA> | \n",
" <NA> | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" State SourceName Agency TableType Year \\\n",
"5 Maryland Montgomery County Montgomery County TRAFFIC STOPS MULTI \n",
"6 Maryland Montgomery County Montgomery County COMPLAINTS MULTI \n",
"55 Maryland Baltimore Baltimore STOPS MULTI \n",
"56 Maryland Maryland MULTI TRAFFIC STOPS MULTI \n",
"367 Maryland Baltimore Baltimore CALLS FOR SERVICE 2017 \n",
"\n",
" Description DataType \\\n",
"5 This dataset contains traffic violation inform... Socrata \n",
"6 This dataset contains allegations brought to t... Socrata \n",
"55 Standardized stop data from the Stanford Open ... CSV \n",
"56 Standardized stop data from the Stanford Open ... CSV \n",
"367 Police Emergency and Non-Emergency calls to 911 ArcGIS \n",
"\n",
" URL date_field \\\n",
"5 data.montgomerycountymd.gov date_of_stop \n",
"6 data.montgomerycountymd.gov created_dt \n",
"55 https://stacks.stanford.edu/file/druid:yg821jf... date \n",
"56 https://stacks.stanford.edu/file/druid:yg821jf... date \n",
"367 https://opendata.baltimorecity.gov/egis/rest/s... \n",
"\n",
" dataset_id agency_field min_version \\\n",
"5 4mse-ku6q \n",
"6 usip-62e2 \n",
"55 \n",
"56 department_name \n",
"367 \n",
"\n",
" readme \n",
"5 https://data.montgomerycountymd.gov/Public-Saf... \n",
"6 https://data.montgomerycountymd.gov/Public-Saf... \n",
"55 https://github.com/stanford-policylab/opp/blob... \n",
"56 https://github.com/stanford-policylab/opp/blob... \n",
"367 "
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# To see all available datasets for a state use the following filter.\n",
"df = opd.datasets.query(state=\"Maryland\")\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Maryland has the following tables available: ['TRAFFIC STOPS' 'COMPLAINTS' 'STOPS' 'CALLS FOR SERVICE' 'ARRESTS']\n"
]
}
],
"source": [
"# Now further filter the dataset by looking for particular data in a particular state\n",
"# First look at the table data options for the state\n",
"df = opd.datasets.query(state=\"Maryland\")\n",
"print(f\"{df.iloc[0]['State']} has the following tables available: {df['TableType'].unique()}\")\n"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" State | \n",
" SourceName | \n",
" Agency | \n",
" TableType | \n",
" Year | \n",
" Description | \n",
" DataType | \n",
" URL | \n",
" date_field | \n",
" dataset_id | \n",
" agency_field | \n",
" min_version | \n",
" readme | \n",
"
\n",
" \n",
" \n",
" \n",
" | 5 | \n",
" Maryland | \n",
" Montgomery County | \n",
" Montgomery County | \n",
" TRAFFIC STOPS | \n",
" MULTI | \n",
" This dataset contains traffic violation inform... | \n",
" Socrata | \n",
" data.montgomerycountymd.gov | \n",
" date_of_stop | \n",
" 4mse-ku6q | \n",
" <NA> | \n",
" <NA> | \n",
" https://data.montgomerycountymd.gov/Public-Saf... | \n",
"
\n",
" \n",
" | 56 | \n",
" Maryland | \n",
" Maryland | \n",
" MULTI | \n",
" TRAFFIC STOPS | \n",
" MULTI | \n",
" Standardized stop data from the Stanford Open ... | \n",
" CSV | \n",
" https://stacks.stanford.edu/file/druid:yg821jf... | \n",
" date | \n",
" <NA> | \n",
" department_name | \n",
" <NA> | \n",
" https://github.com/stanford-policylab/opp/blob... | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" State SourceName Agency TableType Year \\\n",
"5 Maryland Montgomery County Montgomery County TRAFFIC STOPS MULTI \n",
"56 Maryland Maryland MULTI TRAFFIC STOPS MULTI \n",
"\n",
" Description DataType \\\n",
"5 This dataset contains traffic violation inform... Socrata \n",
"56 Standardized stop data from the Stanford Open ... CSV \n",
"\n",
" URL date_field \\\n",
"5 data.montgomerycountymd.gov date_of_stop \n",
"56 https://stacks.stanford.edu/file/druid:yg821jf... date \n",
"\n",
" dataset_id agency_field min_version \\\n",
"5 4mse-ku6q \n",
"56 department_name \n",
"\n",
" readme \n",
"5 https://data.montgomerycountymd.gov/Public-Saf... \n",
"56 https://github.com/stanford-policylab/opp/blob... "
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# For example query for all traffic stops data containing traffic stops in Maryland select the table_type using the names from the previous cell.\n",
"df = opd.datasets.query(table_type='TRAFFIC STOPS', state=\"Maryland\")\n",
"df.head()\n",
"# To learn how to load the data open the notebook: loading_datasets.ipynb"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3.9.12 ('opd')",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.10"
},
"orig_nbformat": 4,
"vscode": {
"interpreter": {
"hash": "a73158d29711b2da05ac73de25b71e5d8cae591f14917bba77a9573b5c85a0ce"
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}