library(sfheaders)
library(sf) ## for nice printing

R

The hierarchy of sf objects goes

  1. sfg - simple feature geometry
  2. sfc - simple feature collection (a collection of sfg)
  3. sf - simple feature object (sfc with data attributes)

sfg

The sfg_ group of functions assume the input is a single geometry.


sfg_point( 1:2 )
#  POINT (1 2)
sfg_point( 1:3 )
#  POINT Z (1 2 3)
sfg_point( 1:4 )
#  POINT ZM (1 2 3 4)

sfg_linestring( 1:4 )
#  LINESTRING ZM (1 2 3 4)

df <- data.frame(
  x = c(1,1,4,4,1)
  , y = c(1,4,4,1,1)
)
sfg_linestring(df)
#  LINESTRING (1 1, 1 4, 4 4, 4 1, 1 1)

sfg_polygon(df)
#  POLYGON ((1 1, 1 4, 4 4, 4 1, 1 1))

sfc

The sfc_ group of functions let you specify an ‘id’ value to identify individual geometries. (Except for sfc_point(), in this case every row is an individual geometry).


df <- data.frame(
  id = c(1,1,2,2,3)
  , x = c(1,1,4,4,1)
  , y = c(1,4,4,1,1)
)

sfc_point( df )
#  Geometry set for 5 features 
#  geometry type:  POINT
#  dimension:      XYZ
#  bbox:           xmin: 1 ymin: 1 xmax: 3 ymax: 4
#  z_range:        zmin: 1 zmax: 4
#  m_range:        mmin: NA mmax: NA
#  epsg (SRID):    NA
#  proj4string:    NA
#  POINT Z (1 1 1)
#  POINT Z (1 1 4)
#  POINT Z (2 4 4)
#  POINT Z (2 4 1)
#  POINT Z (3 1 1)

sfc_multipoint( df, multipoint_id = "id" )
#  Geometry set for 3 features 
#  geometry type:  MULTIPOINT
#  dimension:      XY
#  bbox:           xmin: 1 ymin: 1 xmax: 4 ymax: 4
#  z_range:        zmin: NA zmax: NA
#  m_range:        mmin: NA mmax: NA
#  epsg (SRID):    NA
#  proj4string:    NA
#  MULTIPOINT (1 1, 1 4)
#  MULTIPOINT (4 4, 4 1)
#  MULTIPOINT (1 1)

sfc_linestring( df, linestring_id = "id" )
#  Geometry set for 3 features 
#  geometry type:  LINESTRING
#  dimension:      XY
#  bbox:           xmin: 1 ymin: 1 xmax: 4 ymax: 4
#  z_range:        zmin: NA zmax: NA
#  m_range:        mmin: NA mmax: NA
#  epsg (SRID):    NA
#  proj4string:    NA
#  LINESTRING (1 1, 1 4)
#  LINESTRING (4 4, 4 1)
#  LINESTRING (1 1)

## leaving the 'id' field blank
sfc_polygon( df )
#  Geometry set for 1 feature 
#  geometry type:  POLYGON
#  dimension:      XYZ
#  bbox:           xmin: 1 ymin: 1 xmax: 3 ymax: 4
#  z_range:        zmin: 1 zmax: 4
#  m_range:        mmin: NA mmax: NA
#  epsg (SRID):    NA
#  proj4string:    NA
#  POLYGON Z ((1 1 1, 1 1 4, 2 4 4, 2 4 1, 3 1 1, ...

sf

The sf_ functions also let you specify an ‘id’ value, but in this case the id is retained and kept on the object


df <- data.frame(
  id = c(1,1,2,2,3)
  , x = c(1,1,4,4,1)
  , y = c(1,4,4,1,1)
)

sf_point( df )
#  Simple feature collection with 5 features and 0 fields
#  geometry type:  POINT
#  dimension:      XYZ
#  bbox:           xmin: 1 ymin: 1 xmax: 3 ymax: 4
#  z_range:        zmin: 1 zmax: 4
#  m_range:        mmin: NA mmax: NA
#  epsg (SRID):    NA
#  proj4string:    NA
#           geometry
#  1 POINT Z (1 1 1)
#  2 POINT Z (1 1 4)
#  3 POINT Z (2 4 4)
#  4 POINT Z (2 4 1)
#  5 POINT Z (3 1 1)

sf_multipoint( df, multipoint_id = "id" )
#  Simple feature collection with 3 features and 1 field
#  geometry type:  MULTIPOINT
#  dimension:      XY
#  bbox:           xmin: 1 ymin: 1 xmax: 4 ymax: 4
#  z_range:        zmin: NA zmax: NA
#  m_range:        mmin: NA mmax: NA
#  epsg (SRID):    NA
#  proj4string:    NA
#    id              geometry
#  1  1 MULTIPOINT (1 1, 1 4)
#  2  2 MULTIPOINT (4 4, 4 1)
#  3  3      MULTIPOINT (1 1)

sf_linestring( df, linestring_id = "id" )
#  Simple feature collection with 3 features and 1 field
#  geometry type:  LINESTRING
#  dimension:      XY
#  bbox:           xmin: 1 ymin: 1 xmax: 4 ymax: 4
#  z_range:        zmin: NA zmax: NA
#  m_range:        mmin: NA mmax: NA
#  epsg (SRID):    NA
#  proj4string:    NA
#    id              geometry
#  1  1 LINESTRING (1 1, 1 4)
#  2  2 LINESTRING (4 4, 4 1)
#  3  3      LINESTRING (1 1)

## leaving the 'id' field blank
sf_polygon( df )
#  Simple feature collection with 1 feature and 1 field
#  geometry type:  POLYGON
#  dimension:      XYZ
#  bbox:           xmin: 1 ymin: 1 xmax: 3 ymax: 4
#  z_range:        zmin: 1 zmax: 4
#  m_range:        mmin: NA mmax: NA
#  epsg (SRID):    NA
#  proj4string:    NA
#    id                       geometry
#  1  1 POLYGON Z ((1 1 1, 1 1 4, 2...

In all these examples I haven’t needed to specify the geometry columns, because other than the id field, all the other columns are used for the coordinates.

If your data.frame has other, non-geometry and non-id columns, you must specify at least the x and y paramters


df <- data.frame(
  id = c(1,1,2,2,3)
  , x = c(1,1,4,4,1)
  , y = c(1,4,4,1,1)
)

df$val <- letters[ df$id ]

sf_point( df, x = "x", y = "y" )
#  Simple feature collection with 5 features and 0 fields
#  geometry type:  POINT
#  dimension:      XY
#  bbox:           xmin: 1 ymin: 1 xmax: 4 ymax: 4
#  z_range:        zmin: NA zmax: NA
#  m_range:        mmin: NA mmax: NA
#  epsg (SRID):    NA
#  proj4string:    NA
#       geometry
#  1 POINT (1 1)
#  2 POINT (1 4)
#  3 POINT (4 4)
#  4 POINT (4 1)
#  5 POINT (1 1)

sf_linestring( df, x = "x", y = "y", linestring_id = "id" )
#  Simple feature collection with 3 features and 1 field
#  geometry type:  LINESTRING
#  dimension:      XY
#  bbox:           xmin: 1 ymin: 1 xmax: 4 ymax: 4
#  z_range:        zmin: NA zmax: NA
#  m_range:        mmin: NA mmax: NA
#  epsg (SRID):    NA
#  proj4string:    NA
#    id              geometry
#  1  1 LINESTRING (1 1, 1 4)
#  2  2 LINESTRING (4 4, 4 1)
#  3  3      LINESTRING (1 1)

If you want to keep all the other columns, set keep = TRUE

sf_linestring( df, x = "x", y = "y", linestring_id = "id", keep = TRUE )
#  Simple feature collection with 3 features and 2 fields
#  geometry type:  LINESTRING
#  dimension:      XY
#  bbox:           xmin: 1 ymin: 1 xmax: 4 ymax: 4
#  z_range:        zmin: NA zmax: NA
#  m_range:        mmin: NA mmax: NA
#  epsg (SRID):    NA
#  proj4string:    NA
#    id val              geometry
#  1  1   a LINESTRING (1 1, 1 4)
#  2  2   b LINESTRING (4 4, 4 1)
#  3  3   c      LINESTRING (1 1)

Converting to Data Frames

As of version 1.0 you can now convert from sfg, sfc and sf objects to data.frames


df <- data.frame(
  id = c(1,1,2,2,3)
  , x = c(1,1,4,4,1)
  , y = c(1,4,4,1,1)
)

df$val <- letters[ df$id ]

sf <- sf_linestring( df, x = "x", y = "y", linestring_id = "id", keep = TRUE )

sf_to_df( sf )
#    sfg_id linestring_id x y
#  1      1             1 1 1
#  2      1             1 1 4
#  3      2             2 4 4
#  4      2             2 4 1
#  5      3             3 1 1

And if you want to keep all the other columns and fill them down each row of the data.frame, set fill = TRUE


sf_to_df( sf, fill = TRUE )
#    id val sfg_id linestring_id x y
#  1  1   a      1             1 1 1
#  2  1   a      1             1 1 4
#  3  2   b      2             2 4 4
#  4  2   b      2             2 4 1
#  5  3   c      3             3 1 1

Performance

Here’s a quick benchmark showing how well this library performs

n <- 1e5
df <- data.frame(
  id = rep(1:(n/5), each = 5)
  , x = rnorm(n)
  , y = rnorm(n)
)

library(data.table)
library(microbenchmark)

dt <- as.data.table( df )
microbenchmark(

  dt = {
    sf <- dt[
      , {
        geometry <- sf::st_linestring( x = matrix( c( x, y ), ncol = 2, byrow = T ))
        geometry <- sf::st_sf( geometry = sf::st_sfc( geometry ) )
      }
      , by = id
    ]
    sf <- sf::st_as_sf( sf )
  },
  
  sfheaders = {
    sfh <- sfheaders::sf_linestring(
      obj = df
      , linestring_id = "id"
    )
  },
  times = 5
)

# Unit: milliseconds
#      expr        min         lq       mean     median         uq        max neval
#        dt 6599.67479 6654.12357 6779.23543 6750.19807 6833.46262 7058.71809     5
# sfheaders   21.07775   21.30438   23.20592   23.21665   25.06429   25.36654     5