其中一个选项是使用regex lookaround提取数值部分
library(tidyverse)
data_frame(lat = str_extract(lines, "(?<=\\()-?[0-9.]+"),
lon = str_extract(lines, "-?[0-9.]+(?=\\))"))
# A tibble: 5 x 2
# lat lon
# <chr> <chr>
#1 42.252352 -71.075213
#2 42.332339 -71.246592
#3 42.335954 -71.107661
#4 42.09707 -71.065645
#5 42.465496 -71.121408
或与
read.csv
删除字符后,直到
(
,包括
(
和
)
(最后)与
gsub
,使
,
作为
读.csv
分成两列
read.csv(text = gsub("^[^(]+\\(|\\)$", "", lines), header=FALSE,
col.names = c("lat", "lon"))
# lat lon
#1 42.25235 -71.07521
#2 42.33234 -71.24659
#3 42.33595 -71.10766
#4 42.09707 -71.06565
#5 42.46550 -71.12141
数据
lines <- readLines("file.txt")