library(sfheaders)
# library(sf) ## for nice printing (disabled on github actinos!)

R

The hierarchy of sf objects goes

  1. sfg - simple feature geometry
  2. sfc - simple feature collection (a collection of sfg)
  3. sf - simple feature object (sfc with data attributes)

sfg

The sfg_ group of functions assume the input is a single geometry.


sfg_point( 1:2 )
#       [,1] [,2]
#  [1,]    1    2
#  attr(,"class")
#  [1] "XY"    "POINT" "sfg"
sfg_point( 1:3 )
#       [,1] [,2] [,3]
#  [1,]    1    2    3
#  attr(,"class")
#  [1] "XYZ"   "POINT" "sfg"
sfg_point( 1:4 )
#       [,1] [,2] [,3] [,4]
#  [1,]    1    2    3    4
#  attr(,"class")
#  [1] "XYZM"  "POINT" "sfg"

sfg_linestring( 1:4 )
#       [,1] [,2] [,3] [,4]
#  [1,]    1    2    3    4
#  attr(,"class")
#  [1] "XYZM"       "LINESTRING" "sfg"

df <- data.frame(
  x = c(1,1,4,4,1)
  , y = c(1,4,4,1,1)
)
sfg_linestring(df)
#       [,1] [,2]
#  [1,]    1    1
#  [2,]    1    4
#  [3,]    4    4
#  [4,]    4    1
#  [5,]    1    1
#  attr(,"class")
#  [1] "XY"         "LINESTRING" "sfg"

sfg_polygon(df)
#  [[1]]
#       [,1] [,2]
#  [1,]    1    1
#  [2,]    1    4
#  [3,]    4    4
#  [4,]    4    1
#  [5,]    1    1
#  
#  attr(,"class")
#  [1] "XY"      "POLYGON" "sfg"

sfc

The sfc_ group of functions let you specify an ‘id’ value to identify individual geometries. (Except for sfc_point(), in this case every row is an individual geometry).


df <- data.frame(
  id = c(1,1,2,2,3)
  , x = c(1,1,4,4,1)
  , y = c(1,4,4,1,1)
)

sfc_point( df )
#  [[1]]
#  [1] 1 1 1
#  attr(,"class")
#  [1] "XYZ"   "POINT" "sfg"  
#  
#  [[2]]
#  [1] 1 1 4
#  attr(,"class")
#  [1] "XYZ"   "POINT" "sfg"  
#  
#  [[3]]
#  [1] 2 4 4
#  attr(,"class")
#  [1] "XYZ"   "POINT" "sfg"  
#  
#  [[4]]
#  [1] 2 4 1
#  attr(,"class")
#  [1] "XYZ"   "POINT" "sfg"  
#  
#  [[5]]
#  [1] 3 1 1
#  attr(,"class")
#  [1] "XYZ"   "POINT" "sfg"  
#  
#  attr(,"n_empty")
#  [1] 0
#  attr(,"crs")
#  $input
#  [1] NA
#  
#  $wkt
#  [1] NA
#  
#  attr(,"class")
#  [1] "crs"
#  attr(,"class")
#  [1] "sfc_POINT" "sfc"      
#  attr(,"precision")
#  [1] 0
#  attr(,"bbox")
#  xmin ymin xmax ymax 
#     1    1    3    4 
#  attr(,"class")
#  [1] "bbox"
#  attr(,"z_range")
#  zmin zmax 
#     1    4 
#  attr(,"class")
#  [1] "z_range"

sfc_multipoint( df, multipoint_id = "id" )
#  [[1]]
#       [,1] [,2]
#  [1,]    1    1
#  [2,]    1    4
#  attr(,"class")
#  [1] "XY"         "MULTIPOINT" "sfg"       
#  
#  [[2]]
#       [,1] [,2]
#  [1,]    4    4
#  [2,]    4    1
#  attr(,"class")
#  [1] "XY"         "MULTIPOINT" "sfg"       
#  
#  [[3]]
#       [,1] [,2]
#  [1,]    1    1
#  attr(,"class")
#  [1] "XY"         "MULTIPOINT" "sfg"       
#  
#  attr(,"n_empty")
#  [1] 0
#  attr(,"crs")
#  $input
#  [1] NA
#  
#  $wkt
#  [1] NA
#  
#  attr(,"class")
#  [1] "crs"
#  attr(,"class")
#  [1] "sfc_MULTIPOINT" "sfc"           
#  attr(,"precision")
#  [1] 0
#  attr(,"bbox")
#  xmin ymin xmax ymax 
#     1    1    4    4 
#  attr(,"class")
#  [1] "bbox"

sfc_linestring( df, linestring_id = "id" )
#  [[1]]
#       [,1] [,2]
#  [1,]    1    1
#  [2,]    1    4
#  attr(,"class")
#  [1] "XY"         "LINESTRING" "sfg"       
#  
#  [[2]]
#       [,1] [,2]
#  [1,]    4    4
#  [2,]    4    1
#  attr(,"class")
#  [1] "XY"         "LINESTRING" "sfg"       
#  
#  [[3]]
#       [,1] [,2]
#  [1,]    1    1
#  attr(,"class")
#  [1] "XY"         "LINESTRING" "sfg"       
#  
#  attr(,"n_empty")
#  [1] 0
#  attr(,"crs")
#  $input
#  [1] NA
#  
#  $wkt
#  [1] NA
#  
#  attr(,"class")
#  [1] "crs"
#  attr(,"class")
#  [1] "sfc_LINESTRING" "sfc"           
#  attr(,"precision")
#  [1] 0
#  attr(,"bbox")
#  xmin ymin xmax ymax 
#     1    1    4    4 
#  attr(,"class")
#  [1] "bbox"

## leaving the 'id' field blank
sfc_polygon( df )
#  [[1]]
#  [[1]]
#       [,1] [,2] [,3]
#  [1,]    1    1    1
#  [2,]    1    1    4
#  [3,]    2    4    4
#  [4,]    2    4    1
#  [5,]    3    1    1
#  [6,]    1    1    1
#  
#  attr(,"class")
#  [1] "XYZ"     "POLYGON" "sfg"    
#  
#  attr(,"n_empty")
#  [1] 0
#  attr(,"crs")
#  $input
#  [1] NA
#  
#  $wkt
#  [1] NA
#  
#  attr(,"class")
#  [1] "crs"
#  attr(,"class")
#  [1] "sfc_POLYGON" "sfc"        
#  attr(,"precision")
#  [1] 0
#  attr(,"bbox")
#  xmin ymin xmax ymax 
#     1    1    3    4 
#  attr(,"class")
#  [1] "bbox"
#  attr(,"z_range")
#  zmin zmax 
#     1    4 
#  attr(,"class")
#  [1] "z_range"

sf

The sf_ functions also let you specify an ‘id’ value, but in this case the id is retained and kept on the object


df <- data.frame(
  id = c(1,1,2,2,3)
  , x = c(1,1,4,4,1)
  , y = c(1,4,4,1,1)
)

sf_point( df )
#    geometry
#  1  1, 1, 1
#  2  1, 1, 4
#  3  2, 4, 4
#  4  2, 4, 1
#  5  3, 1, 1

sf_multipoint( df, multipoint_id = "id" )
#    id   geometry
#  1  1 1, 1, 1, 4
#  2  2 4, 4, 4, 1
#  3  3       1, 1

sf_linestring( df, linestring_id = "id" )
#    id   geometry
#  1  1 1, 1, 1, 4
#  2  2 4, 4, 4, 1
#  3  3       1, 1

## leaving the 'id' field blank
sf_polygon( df )
#    id                                             geometry
#  1  1 1, 1, 2, 2, 3, 1, 1, 1, 4, 4, 1, 1, 1, 4, 4, 1, 1, 1

In all these examples I haven’t needed to specify the geometry columns, because other than the id field, all the other columns are used for the coordinates.

If your data.frame has other, non-geometry and non-id columns, you must specify at least the x and y paramters


df <- data.frame(
  id = c(1,1,2,2,3)
  , x = c(1,1,4,4,1)
  , y = c(1,4,4,1,1)
)

df$val <- letters[ df$id ]

sf_point( df, x = "x", y = "y" )
#    geometry
#  1     1, 1
#  2     1, 4
#  3     4, 4
#  4     4, 1
#  5     1, 1

sf_linestring( df, x = "x", y = "y", linestring_id = "id" )
#    id   geometry
#  1  1 1, 1, 1, 4
#  2  2 4, 4, 4, 1
#  3  3       1, 1

If you want to keep all the other columns, set keep = TRUE

sf_linestring( df, x = "x", y = "y", linestring_id = "id", keep = TRUE )
#    id val   geometry
#  1  1   a 1, 1, 1, 4
#  2  2   b 4, 4, 4, 1
#  3  3   c       1, 1

Converting to Data Frames

As of version 1.0 you can now convert from sfg, sfc and sf objects to data.frames


df <- data.frame(
  id = c(1,1,2,2,3)
  , x = c(1,1,4,4,1)
  , y = c(1,4,4,1,1)
)

df$val <- letters[ df$id ]

sf <- sf_linestring( df, x = "x", y = "y", linestring_id = "id", keep = TRUE )

sf_to_df( sf )
#    sfg_id linestring_id x y
#  1      1             1 1 1
#  2      1             1 1 4
#  3      2             2 4 4
#  4      2             2 4 1
#  5      3             3 1 1

And if you want to keep all the other columns and fill them down each row of the data.frame, set fill = TRUE


sf_to_df( sf, fill = TRUE )
#    id val sfg_id linestring_id x y
#  1  1   a      1             1 1 1
#  2  1   a      1             1 1 4
#  3  2   b      2             2 4 4
#  4  2   b      2             2 4 1
#  5  3   c      3             3 1 1

Performance

Here’s a quick benchmark showing how well this library performs

n <- 1e5
df <- data.frame(
  id = rep(1:(n/5), each = 5)
  , x = rnorm(n)
  , y = rnorm(n)
)

library(data.table)
library(microbenchmark)

dt <- as.data.table( df )
microbenchmark(

  dt = {
    sf <- dt[
      , {
        geometry <- sf::st_linestring( x = matrix( c( x, y ), ncol = 2, byrow = T ))
        geometry <- sf::st_sf( geometry = sf::st_sfc( geometry ) )
      }
      , by = id
    ]
    sf <- sf::st_as_sf( sf )
  },
  
  sfheaders = {
    sfh <- sfheaders::sf_linestring(
      obj = df
      , linestring_id = "id"
    )
  },
  times = 5
)

# Unit: milliseconds
#      expr        min         lq       mean     median         uq        max neval
#        dt 6599.67479 6654.12357 6779.23543 6750.19807 6833.46262 7058.71809     5
# sfheaders   21.07775   21.30438   23.20592   23.21665   25.06429   25.36654     5