Code
library(readxl)
EurostatCrime2021 <- read_excel("EurostatCrime2021.xlsx", sheet = "Sheet 1", skip = 5)library(readxl)
EurostatCrime2021 <- read_excel("EurostatCrime2021.xlsx", sheet = "Sheet 1", skip = 5)dim(EurostatCrime2021)[1] 41 18
# Load dplyr for df manipulation
library(dplyr)
Attaching package: 'dplyr'
The following objects are masked from 'package:stats':
filter, lag
The following objects are masked from 'package:base':
intersect, setdiff, setequal, union
# Remove Fraud and Money Laundering columns
df <- select(EurostatCrime2021, -Fraud, -"Money laundering")# Create copy of df and remove non numeric column (Country)
df_numeric <- select(df, -Country)
# Convert all numeric columns to the numeric type for further computation via numeric column names list
df[colnames(df_numeric)] <- sapply(df[colnames(df_numeric)], as.numeric)Warning in lapply(X = X, FUN = FUN, ...): NAs introduced by coercion
Warning in lapply(X = X, FUN = FUN, ...): NAs introduced by coercion
Warning in lapply(X = X, FUN = FUN, ...): NAs introduced by coercion
Warning in lapply(X = X, FUN = FUN, ...): NAs introduced by coercion
Warning in lapply(X = X, FUN = FUN, ...): NAs introduced by coercion
Warning in lapply(X = X, FUN = FUN, ...): NAs introduced by coercion
Warning in lapply(X = X, FUN = FUN, ...): NAs introduced by coercion
Warning in lapply(X = X, FUN = FUN, ...): NAs introduced by coercion
Warning in lapply(X = X, FUN = FUN, ...): NAs introduced by coercion
Warning in lapply(X = X, FUN = FUN, ...): NAs introduced by coercion
Warning in lapply(X = X, FUN = FUN, ...): NAs introduced by coercion
# Create offences column (row sums)
df$offences <- rowSums(select(df, -Country))countries<-list()
for(i in 1:nrow(df)) {
if(any(is.na(df[i,]))) {
countries <- append(countries, df[[i,"Country"]])
}
}
str(countries)List of 29
$ : chr "Belgium"
$ : chr "Bulgaria"
$ : chr "Denmark"
$ : chr "Germany"
$ : chr "Estonia"
$ : chr "France"
$ : chr "Italy"
$ : chr "Luxembourg"
$ : chr "Hungary"
$ : chr "Malta"
$ : chr "Netherlands"
$ : chr "Poland"
$ : chr "Portugal"
$ : chr "Slovenia"
$ : chr "Slovakia"
$ : chr "Sweden"
$ : chr "Iceland"
$ : chr "Liechtenstein"
$ : chr "Norway"
$ : chr "Switzerland"
$ : chr "England and Wales"
$ : chr "Scotland"
$ : chr "Northern Ireland (UK)"
$ : chr "Bosnia and Herzegovina"
$ : chr "Montenegro"
$ : chr "North Macedonia"
$ : chr "Serbia"
$ : chr "Türkiye"
$ : chr "Kosovo (under United Nations Security Council Resolution 1244/99)"
# Keep copy of original data for creativity task
df_original <- df
# Omit records with NA values
df <- na.omit(df)dim(df)[1] 12 13
# library(tidyverse)
# df[which.max(df$offences),"Country"] %>%
# pluck("Country", 1)
df[which.max(df$offences),"Country"][[1,1]][1] "Finland"
# Create copy of df for plot in next task
df_copy <- df
# Create df with country, offences and acts against computer system columns
df <- select(df, Country, offences, "Acts against computer systems")
# Create proportion column
df$proportion <- (df$"Acts against computer systems" / df$offences) * 100
# Round proportion column to 3 decimal places
df[,'proportion'] = round(df[, 'proportion'], 3)
# Sort by proportion in ascending order
df <- df[order(as.integer(df$proportion), decreasing = FALSE),]
df <- select(df, -"Acts against computer systems")
# Print table
library(knitr)
#| label: tbl-LABEL
#| tbl-cap: CAPTION
knitr::kable(df)| Country | offences | proportion |
|---|---|---|
| Ireland | 577.69 | 0.370 |
| Latvia | 172.10 | 0.552 |
| Greece | 162.93 | 2.222 |
| Finland | 785.50 | 4.537 |
| Albania | 176.96 | 4.035 |
| Spain | 249.70 | 6.320 |
| Romania | 109.92 | 7.114 |
| Croatia | 346.16 | 9.282 |
| Cyprus | 168.97 | 9.641 |
| Czechia | 138.13 | 12.626 |
| Lithuania | 196.16 | 12.857 |
| Austria | 733.71 | 23.023 |
# Load plot libraries
library(ggplot2)Warning: package 'ggplot2' was built under R version 4.3.2
library(plotly)
Attaching package: 'plotly'
The following object is masked from 'package:ggplot2':
last_plot
The following object is masked from 'package:stats':
filter
The following object is masked from 'package:graphics':
layout
# Replace spaces in column names with underscores for convenience
colnames(df_copy) <- gsub(" ", "_", colnames(df_copy))
# Change plot size
options(repr.plot.width=5, repr.plot.height=4)
# Plot and label graph
p <- df_copy |>
ggplot(mapping = aes(x = Unlawful_acts_involving_controlled_drugs_or_precursors, y = Robbery)) +
geom_point(aes(colour = Country)) +
labs(x = "Unlawful acts involving controlled drugs or precursors")
ggplotly(p)# Plot 1
# Replace spaces in column names with underscores for convenience
colnames(df_original) <- gsub(" ", "_", colnames(df_original))
# Check correlation of the 2 cols in original dataset before NA omit
cor(x = df_original$Intentional_homicide, y = df_original$Attempted_intentional_homicide, use = "complete.obs")[1] 0.01422267
# Plot and label graph
p1 <- df_original |>
ggplot(mapping = aes(x = Intentional_homicide, y = Attempted_intentional_homicide)) +
geom_point(aes(colour = Country))
ggplotly(p1)# Plot 2
# Plot and label graph
p2 <- df_original |>
ggplot(mapping = aes(x = Intentional_homicide, y = Attempted_intentional_homicide)) +
geom_point(aes(size = offences, colour = Country)) +
theme_bw()
ggplotly(p2)