The hierarchy of sf
objects goes
The sfg_
group of functions assume the input is a single
geometry.
sfg_point( 1:2 )
# [,1] [,2]
# [1,] 1 2
# attr(,"class")
# [1] "XY" "POINT" "sfg"
sfg_point( 1:3 )
# [,1] [,2] [,3]
# [1,] 1 2 3
# attr(,"class")
# [1] "XYZ" "POINT" "sfg"
sfg_point( 1:4 )
# [,1] [,2] [,3] [,4]
# [1,] 1 2 3 4
# attr(,"class")
# [1] "XYZM" "POINT" "sfg"
sfg_linestring( 1:4 )
# [,1] [,2] [,3] [,4]
# [1,] 1 2 3 4
# attr(,"class")
# [1] "XYZM" "LINESTRING" "sfg"
df <- data.frame(
x = c(1,1,4,4,1)
, y = c(1,4,4,1,1)
)
sfg_linestring(df)
# [,1] [,2]
# [1,] 1 1
# [2,] 1 4
# [3,] 4 4
# [4,] 4 1
# [5,] 1 1
# attr(,"class")
# [1] "XY" "LINESTRING" "sfg"
sfg_polygon(df)
# [[1]]
# [,1] [,2]
# [1,] 1 1
# [2,] 1 4
# [3,] 4 4
# [4,] 4 1
# [5,] 1 1
#
# attr(,"class")
# [1] "XY" "POLYGON" "sfg"
The sfc_
group of functions let you specify an ‘id’
value to identify individual geometries. (Except for
sfc_point()
, in this case every row is an individual
geometry).
df <- data.frame(
id = c(1,1,2,2,3)
, x = c(1,1,4,4,1)
, y = c(1,4,4,1,1)
)
sfc_point( df )
# [[1]]
# [1] 1 1 1
# attr(,"class")
# [1] "XYZ" "POINT" "sfg"
#
# [[2]]
# [1] 1 1 4
# attr(,"class")
# [1] "XYZ" "POINT" "sfg"
#
# [[3]]
# [1] 2 4 4
# attr(,"class")
# [1] "XYZ" "POINT" "sfg"
#
# [[4]]
# [1] 2 4 1
# attr(,"class")
# [1] "XYZ" "POINT" "sfg"
#
# [[5]]
# [1] 3 1 1
# attr(,"class")
# [1] "XYZ" "POINT" "sfg"
#
# attr(,"n_empty")
# [1] 0
# attr(,"crs")
# $input
# [1] NA
#
# $wkt
# [1] NA
#
# attr(,"class")
# [1] "crs"
# attr(,"class")
# [1] "sfc_POINT" "sfc"
# attr(,"precision")
# [1] 0
# attr(,"bbox")
# xmin ymin xmax ymax
# 1 1 3 4
# attr(,"class")
# [1] "bbox"
# attr(,"z_range")
# zmin zmax
# 1 4
# attr(,"class")
# [1] "z_range"
sfc_multipoint( df, multipoint_id = "id" )
# [[1]]
# [,1] [,2]
# [1,] 1 1
# [2,] 1 4
# attr(,"class")
# [1] "XY" "MULTIPOINT" "sfg"
#
# [[2]]
# [,1] [,2]
# [1,] 4 4
# [2,] 4 1
# attr(,"class")
# [1] "XY" "MULTIPOINT" "sfg"
#
# [[3]]
# [,1] [,2]
# [1,] 1 1
# attr(,"class")
# [1] "XY" "MULTIPOINT" "sfg"
#
# attr(,"n_empty")
# [1] 0
# attr(,"crs")
# $input
# [1] NA
#
# $wkt
# [1] NA
#
# attr(,"class")
# [1] "crs"
# attr(,"class")
# [1] "sfc_MULTIPOINT" "sfc"
# attr(,"precision")
# [1] 0
# attr(,"bbox")
# xmin ymin xmax ymax
# 1 1 4 4
# attr(,"class")
# [1] "bbox"
sfc_linestring( df, linestring_id = "id" )
# [[1]]
# [,1] [,2]
# [1,] 1 1
# [2,] 1 4
# attr(,"class")
# [1] "XY" "LINESTRING" "sfg"
#
# [[2]]
# [,1] [,2]
# [1,] 4 4
# [2,] 4 1
# attr(,"class")
# [1] "XY" "LINESTRING" "sfg"
#
# [[3]]
# [,1] [,2]
# [1,] 1 1
# attr(,"class")
# [1] "XY" "LINESTRING" "sfg"
#
# attr(,"n_empty")
# [1] 0
# attr(,"crs")
# $input
# [1] NA
#
# $wkt
# [1] NA
#
# attr(,"class")
# [1] "crs"
# attr(,"class")
# [1] "sfc_LINESTRING" "sfc"
# attr(,"precision")
# [1] 0
# attr(,"bbox")
# xmin ymin xmax ymax
# 1 1 4 4
# attr(,"class")
# [1] "bbox"
## leaving the 'id' field blank
sfc_polygon( df )
# [[1]]
# [[1]]
# [,1] [,2] [,3]
# [1,] 1 1 1
# [2,] 1 1 4
# [3,] 2 4 4
# [4,] 2 4 1
# [5,] 3 1 1
# [6,] 1 1 1
#
# attr(,"class")
# [1] "XYZ" "POLYGON" "sfg"
#
# attr(,"n_empty")
# [1] 0
# attr(,"crs")
# $input
# [1] NA
#
# $wkt
# [1] NA
#
# attr(,"class")
# [1] "crs"
# attr(,"class")
# [1] "sfc_POLYGON" "sfc"
# attr(,"precision")
# [1] 0
# attr(,"bbox")
# xmin ymin xmax ymax
# 1 1 3 4
# attr(,"class")
# [1] "bbox"
# attr(,"z_range")
# zmin zmax
# 1 4
# attr(,"class")
# [1] "z_range"
The sf_
functions also let you specify an ‘id’ value,
but in this case the id is retained and kept on the object
df <- data.frame(
id = c(1,1,2,2,3)
, x = c(1,1,4,4,1)
, y = c(1,4,4,1,1)
)
sf_point( df )
# geometry
# 1 1, 1, 1
# 2 1, 1, 4
# 3 2, 4, 4
# 4 2, 4, 1
# 5 3, 1, 1
sf_multipoint( df, multipoint_id = "id" )
# id geometry
# 1 1 1, 1, 1, 4
# 2 2 4, 4, 4, 1
# 3 3 1, 1
sf_linestring( df, linestring_id = "id" )
# id geometry
# 1 1 1, 1, 1, 4
# 2 2 4, 4, 4, 1
# 3 3 1, 1
## leaving the 'id' field blank
sf_polygon( df )
# id geometry
# 1 1 1, 1, 2, 2, 3, 1, 1, 1, 4, 4, 1, 1, 1, 4, 4, 1, 1, 1
In all these examples I haven’t needed to specify the geometry
columns, because other than the id
field, all the other
columns are used for the coordinates.
If your data.frame has other, non-geometry and non-id columns, you
must specify at least the x
and y
paramters
df <- data.frame(
id = c(1,1,2,2,3)
, x = c(1,1,4,4,1)
, y = c(1,4,4,1,1)
)
df$val <- letters[ df$id ]
sf_point( df, x = "x", y = "y" )
# geometry
# 1 1, 1
# 2 1, 4
# 3 4, 4
# 4 4, 1
# 5 1, 1
sf_linestring( df, x = "x", y = "y", linestring_id = "id" )
# id geometry
# 1 1 1, 1, 1, 4
# 2 2 4, 4, 4, 1
# 3 3 1, 1
If you want to keep all the other columns, set
keep = TRUE
sf_linestring( df, x = "x", y = "y", linestring_id = "id", keep = TRUE )
# id val geometry
# 1 1 a 1, 1, 1, 4
# 2 2 b 4, 4, 4, 1
# 3 3 c 1, 1
As of version 1.0 you can now convert from sfg
,
sfc
and sf
objects to
data.frames
df <- data.frame(
id = c(1,1,2,2,3)
, x = c(1,1,4,4,1)
, y = c(1,4,4,1,1)
)
df$val <- letters[ df$id ]
sf <- sf_linestring( df, x = "x", y = "y", linestring_id = "id", keep = TRUE )
sf_to_df( sf )
# sfg_id linestring_id x y
# 1 1 1 1 1
# 2 1 1 1 4
# 3 2 2 4 4
# 4 2 2 4 1
# 5 3 3 1 1
And if you want to keep all the other columns and fill them down each
row of the data.frame, set fill = TRUE
sf_to_df( sf, fill = TRUE )
# id val sfg_id linestring_id x y
# 1 1 a 1 1 1 1
# 2 1 a 1 1 1 4
# 3 2 b 2 2 4 4
# 4 2 b 2 2 4 1
# 5 3 c 3 3 1 1
Here’s a quick benchmark showing how well this library performs
n <- 1e5
df <- data.frame(
id = rep(1:(n/5), each = 5)
, x = rnorm(n)
, y = rnorm(n)
)
library(data.table)
library(microbenchmark)
dt <- as.data.table( df )
microbenchmark(
dt = {
sf <- dt[
, {
geometry <- sf::st_linestring( x = matrix( c( x, y ), ncol = 2, byrow = T ))
geometry <- sf::st_sf( geometry = sf::st_sfc( geometry ) )
}
, by = id
]
sf <- sf::st_as_sf( sf )
},
sfheaders = {
sfh <- sfheaders::sf_linestring(
obj = df
, linestring_id = "id"
)
},
times = 5
)
# Unit: milliseconds
# expr min lq mean median uq max neval
# dt 6599.67479 6654.12357 6779.23543 6750.19807 6833.46262 7058.71809 5
# sfheaders 21.07775 21.30438 23.20592 23.21665 25.06429 25.36654 5