对于多个变量，可以使用惟一()

小开

There are a few ways to get all unique combinations of a set of factors.

with(df, interaction(yad, per, drop=TRUE))   # gives labels
with(df, yad:per)                            # ditto


aggregate(numeric(nrow(df)), df[c("yad", "per")], length)    # gives a data frame

小开

最佳答案

How about using unique() itself?

df <- data.frame(yad = c("BARBIE", "BARBIE", "BAKUGAN", "BAKUGAN"),
per = c("AYLIK",  "AYLIK",  "2 AYLIK", "2 AYLIK"),
hmm = 1:4)


df
#       yad     per hmm
# 1  BARBIE   AYLIK   1
# 2  BARBIE   AYLIK   2
# 3 BAKUGAN 2 AYLIK   3
# 4 BAKUGAN 2 AYLIK   4


unique(df[c("yad", "per")])
#       yad     per
# 1  BARBIE   AYLIK
# 3 BAKUGAN 2 AYLIK

小开

This is an addition to Josh's answer.

You can also keep the values of other variables while filtering out duplicated rows in data.table

Example:

library(data.table)


#create data table
dt <- data.table(
V1=LETTERS[c(1,1,1,1,2,3,3,5,7,1)],
V2=LETTERS[c(2,3,4,2,1,4,4,6,7,2)],
V3=c(1),
V4=c(2) )


> dt
# V1 V2 V3 V4
# A  B  1  2
# A  C  1  2
# A  D  1  2
# A  B  1  2
# B  A  1  2
# C  D  1  2
# C  D  1  2
# E  F  1  2
# G  G  1  2
# A  B  1  2


# set the key to all columns
setkey(dt)


# Get Unique lines in the data table
unique( dt[list(V1, V2), nomatch = 0] )


# V1 V2 V3 V4
# A  B  1  2
# A  C  1  2
# A  D  1  2
# B  A  1  2
# C  D  1  2
# E  F  1  2
# G  G  1  2

Alert: If there are different combinations of values in the other variables, then your result will be

unique combination of V1 and V2

小开

This dplyr method works nicely when piping.

For selected columns:

library(dplyr)
iris %>%
select(Sepal.Width, Species) %>%
t %>% c %>% unique


[1] "3.5"        "setosa"     "3.0"        "3.2"        "3.1"
[6] "3.6"        "3.9"        "3.4"        "2.9"        "3.7"
[11] "4.0"        "4.4"        "3.8"        "3.3"        "4.1"
[16] "4.2"        "2.3"        "versicolor" "2.8"        "2.4"
[21] "2.7"        "2.0"        "2.2"        "2.5"        "2.6"
[26] "virginica"

Or for the whole dataframe:

iris %>% t %>% c %>% unique


[1] "5.1"        "3.5"        "1.4"        "0.2"        "setosa"     "4.9"
[7] "3.0"        "4.7"        "3.2"        "1.3"        "4.6"        "3.1"
[13] "1.5"        "5.0"        "3.6"        "5.4"        "3.9"        "1.7"
[19] "0.4"        "3.4"        "0.3"        "4.4"        "2.9"        "0.1"
[25] "3.7"        "4.8"        "1.6"        "4.3"        "1.1"        "5.8"
[31] "4.0"        "1.2"        "5.7"        "3.8"        "1.0"        "3.3"
[37] "0.5"        "1.9"        "5.2"        "4.1"        "5.5"        "4.2"
[43] "4.5"        "2.3"        "0.6"        "5.3"        "7.0"        "versicolor"
[49] "6.4"        "6.9"        "6.5"        "2.8"        "6.3"        "2.4"
[55] "6.6"        "2.7"        "2.0"        "5.9"        "6.0"        "2.2"
[61] "6.1"        "5.6"        "6.7"        "6.2"        "2.5"        "1.8"
[67] "6.8"        "2.6"        "virginica"  "7.1"        "2.1"        "7.6"
[73] "7.3"        "7.2"        "7.7"        "7.4"        "7.9"

小开

unique based on any columns and keep all other columns using dplyr.

df <- df %>%
distinct(col1, col2, .keep_all = TRUE)

小开

This is an old question with many solutions.

Yet, in case you need unique observations based on a selection of columns while also keeping all other columns in the dataframe, you can do it in a clean way using base R as follows:

df$dupe <- duplicated(df[c("X", "Y") ])
df<- subset(df, dupe == FALSE)

The alternative is using 'distinct' as proposed by @micahkimel