将逗号分隔的字符串转换为单独的行

我有一个这样的SQL表:

| SomeID         | OtherID     | Data
+----------------+-------------+-------------------
| abcdef-.....   | cdef123-... | 18,20,22
| abcdef-.....   | 4554a24-... | 17,19
| 987654-.....   | 12324a2-... | 13,19,20

是否有一个查询,我可以执行像SELECT OtherID, SplitData WHERE SomeID = 'abcdef-.......'这样的查询,返回单独的行,就像这样:

| OtherID     | SplitData
+-------------+-------------------
| cdef123-... | 18
| cdef123-... | 20
| cdef123-... | 22
| 4554a24-... | 17
| 4554a24-... | 19

基本上把我的数据在逗号处分割成单独的行?

我知道将comma-separated字符串存储到关系数据库中听起来很愚蠢,但消费者应用程序中的正常用例使这非常有用。

我不想在应用程序中进行拆分,因为我需要分页,所以我想在重构整个应用程序之前探索选项。

它是SQL Server 2008(非r2)。

643419 次浏览

你可以使用SQL Server中的递归函数:


示例表:

CREATE TABLE Testdata
(
SomeID INT,
OtherID INT,
String VARCHAR(MAX)
);


INSERT Testdata SELECT 1,  9, '18,20,22';
INSERT Testdata SELECT 2,  8, '17,19';
INSERT Testdata SELECT 3,  7, '13,19,20';
INSERT Testdata SELECT 4,  6, '';
INSERT Testdata SELECT 9, 11, '1,2,3,4';

查询

WITH tmp(SomeID, OtherID, DataItem, String) AS
(
SELECT
SomeID,
OtherID,
LEFT(String, CHARINDEX(',', String + ',') - 1),
STUFF(String, 1, CHARINDEX(',', String + ','), '')
FROM Testdata
UNION all


SELECT
SomeID,
OtherID,
LEFT(String, CHARINDEX(',', String + ',') - 1),
STUFF(String, 1, CHARINDEX(',', String + ','), '')
FROM tmp
WHERE
String > ''
)
SELECT
SomeID,
OtherID,
DataItem
FROM tmp
ORDER BY SomeID;
-- OPTION (maxrecursion 0)
-- normally recursion is limited to 100. If you know you have very long
-- strings, uncomment the option

输出

 SomeID | OtherID | DataItem
--------+---------+----------
1      | 9       | 18
1      | 9       | 20
1      | 9       | 22
2      | 8       | 17
2      | 8       | 19
3      | 7       | 13
3      | 7       | 19
3      | 7       | 20
4      | 6       |
9      | 11      | 1
9      | 11      | 2
9      | 11      | 3
9      | 11      | 4
;WITH tmp(SomeID, OtherID, DataItem, Data) as (
SELECT SomeID, OtherID, LEFT(Data, CHARINDEX(',',Data+',')-1),
STUFF(Data, 1, CHARINDEX(',',Data+','), '')
FROM Testdata
WHERE Data > ''
)
SELECT SomeID, OtherID, Data
FROM tmp
ORDER BY SomeID

仅对上述查询进行了微小的修改…

检查这个

 SELECT A.OtherID,
Split.a.value('.', 'VARCHAR(100)') AS Data
FROM
(
SELECT OtherID,
CAST ('<M>' + REPLACE(Data, ',', '</M><M>') + '</M>' AS XML) AS Data
FROM  Table1
) AS A CROSS APPLY Data.nodes ('/M') AS Split(a);
DECLARE @id_list VARCHAR(MAX) = '1234,23,56,576,1231,567,122,87876,57553,1216';
DECLARE @table TABLE ( id VARCHAR(50) );
DECLARE @x INT = 0;
DECLARE @firstcomma INT = 0;
DECLARE @nextcomma INT = 0;


SET @x = LEN(@id_list) - LEN(REPLACE(@id_list, ',', '')) + 1; -- number of ids in id_list


WHILE @x > 0
BEGIN
SET @nextcomma = CASE WHEN CHARINDEX(',', @id_list, @firstcomma + 1) = 0
THEN LEN(@id_list) + 1
ELSE CHARINDEX(',', @id_list, @firstcomma + 1)
END;
INSERT  INTO @table
VALUES  ( SUBSTRING(@id_list, @firstcomma + 1, (@nextcomma - @firstcomma) - 1) );
SET @firstcomma = CHARINDEX(',', @id_list, @firstcomma + 1);
SET @x = @x - 1;
END;


SELECT  *
FROM    @table;

截至2016年2月-见talltytable示例-很可能从2014年2月开始超过我的TVF。为子孙后代保留以下原始帖子:


上面的例子中重复的代码太多了。我不喜欢cte和XML的性能。另外,显式的Id,以便特定于订单的消费者可以指定ORDER BY子句。

CREATE FUNCTION dbo.Split
(
@Line nvarchar(MAX),
@SplitOn nvarchar(5) = ','
)
RETURNS @RtnValue table
(
Id INT NOT NULL IDENTITY(1,1) PRIMARY KEY CLUSTERED,
Data nvarchar(100) NOT NULL
)
AS
BEGIN
IF @Line IS NULL RETURN;


DECLARE @split_on_len INT = LEN(@SplitOn);
DECLARE @start_at INT = 1;
DECLARE @end_at INT;
DECLARE @data_len INT;


WHILE 1=1
BEGIN
SET @end_at = CHARINDEX(@SplitOn,@Line,@start_at);
SET @data_len = CASE @end_at WHEN 0 THEN LEN(@Line) ELSE @end_at-@start_at END;
INSERT INTO @RtnValue (data) VALUES( SUBSTRING(@Line,@start_at,@data_len) );
IF @end_at = 0 BREAK;
SET @start_at = @end_at + @split_on_len;
END;


RETURN;
END;
select t.OtherID,x.Kod
from testData t
cross apply (select Code from dbo.Split(t.Data,',') ) x

当使用这种方法时,您必须确保您的值中没有包含非法XML - user1151923

我总是使用XML方法。确保使用VALID XML。我有两个函数在有效的XML和文本之间转换。(我倾向于去掉回车,因为我通常不需要它们。

CREATE FUNCTION dbo.udf_ConvertTextToXML (@Text varchar(MAX))
RETURNS varchar(MAX)
AS
BEGIN
SET @Text = REPLACE(@Text,CHAR(10),'');
SET @Text = REPLACE(@Text,CHAR(13),'');
SET @Text = REPLACE(@Text,'<','&lt;');
SET @Text = REPLACE(@Text,'&','&amp;');
SET @Text = REPLACE(@Text,'>','&gt;');
SET @Text = REPLACE(@Text,'''','&apos;');
SET @Text = REPLACE(@Text,'"','&quot;');
RETURN @Text;
END;




CREATE FUNCTION dbo.udf_ConvertTextFromXML (@Text VARCHAR(MAX))
RETURNS VARCHAR(max)
AS
BEGIN
SET @Text = REPLACE(@Text,'&lt;','<');
SET @Text = REPLACE(@Text,'&amp;','&');
SET @Text = REPLACE(@Text,'&gt;','>');
SET @Text = REPLACE(@Text,'&apos;','''');
SET @Text = REPLACE(@Text,'&quot;','"');
RETURN @Text;
END;

最后,使用SQL Server 2016结束等待。他们引入了Split string函数< >强STRING_SPLIT < / >强:

select OtherID, cs.Value --SplitData
from yourtable
cross apply STRING_SPLIT (Data, ',') cs

所有其他方法来分割字符串,如XML, tallytable, while循环等。已经被STRING_SPLIT函数破坏了。

下面是一篇性能比较的优秀文章:性能惊喜和假设:STRING_SPLIT .

对于旧版本,在这里使用统计数据表是一个分割字符串函数(最好的方法)

CREATE FUNCTION [dbo].[DelimitedSplit8K]
(@pString VARCHAR(8000), @pDelimiter CHAR(1))
RETURNS TABLE WITH SCHEMABINDING AS
RETURN
--===== "Inline" CTE Driven "Tally Table" produces values from 0 up to 10,000...
-- enough to cover NVARCHAR(4000)
WITH E1(N) AS (
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1
),                          --10E+1 or 10 rows
E2(N) AS (SELECT 1 FROM E1 a, E1 b), --10E+2 or 100 rows
E4(N) AS (SELECT 1 FROM E2 a, E2 b), --10E+4 or 10,000 rows max
cteTally(N) AS (--==== This provides the "base" CTE and limits the number of rows right up front
-- for both a performance gain and prevention of accidental "overruns"
SELECT TOP (ISNULL(DATALENGTH(@pString),0)) ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) FROM E4
),
cteStart(N1) AS (--==== This returns N+1 (starting position of each "element" just once for each delimiter)
SELECT 1 UNION ALL
SELECT t.N+1 FROM cteTally t WHERE SUBSTRING(@pString,t.N,1) = @pDelimiter
),
cteLen(N1,L1) AS(--==== Return start and length (for use in substring)
SELECT s.N1,
ISNULL(NULLIF(CHARINDEX(@pDelimiter,@pString,s.N1),0)-s.N1,8000)
FROM cteStart s
)
--===== Do the actual split. The ISNULL/NULLIF combo handles the length for the final element when no delimiter is found.
SELECT ItemNumber = ROW_NUMBER() OVER(ORDER BY l.N1),
Item       = SUBSTRING(@pString, l.N1, l.L1)
FROM cteLen l
;

引用自统计哦!改进的SQL 8K“CSV分配器”函数

很高兴看到这个问题已经在2016年的版本中解决了,但对于所有没有解决的问题,这里有上述方法的两个广义和简化版本。

xml方法更短,但当然需要字符串来允许xml技巧(没有'坏'字符)。

xml函数:

create function dbo.splitString(@input Varchar(max), @Splitter VarChar(99)) returns table as
Return
SELECT Split.a.value('.', 'VARCHAR(max)') AS Data FROM
( SELECT CAST ('<M>' + REPLACE(@input, @Splitter, '</M><M>') + '</M>' AS XML) AS Data
) AS A CROSS APPLY Data.nodes ('/M') AS Split(a);

递归方法:

create function dbo.splitString(@input Varchar(max), @Splitter Varchar(99)) returns table as
Return
with tmp (DataItem, ix) as
( select @input  , CHARINDEX('',@Input)  --Recu. start, ignored val to get the types right
union all
select Substring(@input, ix+1,ix2-ix-1), ix2
from (Select *, CHARINDEX(@Splitter,@Input+@Splitter,ix+1) ix2 from tmp) x where ix2<>0
) select DataItem from tmp where ix<>0

发挥作用

Create table TEST_X (A int, CSV Varchar(100));
Insert into test_x select 1, 'A,B';
Insert into test_x select 2, 'C,D';


Select A,data from TEST_X x cross apply dbo.splitString(x.CSV,',') Y;


Drop table TEST_X

XML-METHOD 2: Unicode友好的😀(由Max Hodges提供) 创建函数dbo。splitString(@input nVarchar(max), @Splitter nVarchar(99))返回表为 返回 选择Split.a.value(”。', 'NVARCHAR(max)') AS数据来源 (选择演员(' & lt; M> ' +取代(@input @Splitter,‘& lt; / M> & lt; M>”)+ ' & lt; / M> '为XML)数据 )作为交叉应用数据。nodes ('/M') AS Split(a); < /代码> < / p >

以下工作在sql server 2008

select *, ROW_NUMBER() OVER(order by items) as row#
from
( select 134 myColumn1, 34 myColumn2, 'd,c,k,e,f,g,h,a' comaSeperatedColumn) myTable
cross apply
SPLIT (rtrim(comaSeperatedColumn), ',') splitedTable -- gives 'items'  column

将得到所有的笛卡尔积与原表列加上“项目”的分割表。

函数

CREATE FUNCTION dbo.SplitToRows (@column varchar(100), @separator varchar(10))
RETURNS @rtnTable TABLE
(
ID int identity(1,1),
ColumnA varchar(max)
)
AS
BEGIN
DECLARE @position int = 0;
DECLARE @endAt int = 0;
DECLARE @tempString varchar(100);
    

set @column = ltrim(rtrim(@column));


WHILE @position<=len(@column)
BEGIN
set @endAt = CHARINDEX(@separator,@column,@position);
if(@endAt=0)
begin
Insert into @rtnTable(ColumnA) Select substring(@column,@position,len(@column)-@position);
break;
end;
set @tempString = substring(ltrim(rtrim(@column)),@position,@endAt-@position);


Insert into @rtnTable(ColumnA) select @tempString;
set @position=@endAt+1;
END;
return;
END;

用例

select * from dbo.SplitToRows('T14; p226.0001; eee; 3554;', ';');

或者只是一个有多个结果集的选择

DECLARE @column varchar(max)= '1234; 4748;abcde; 324432';
DECLARE @separator varchar(10) = ';';
DECLARE @position int = 0;
DECLARE @endAt int = 0;
DECLARE @tempString varchar(100);


set @column = ltrim(rtrim(@column));


WHILE @position<=len(@column)
BEGIN
set @endAt = CHARINDEX(@separator,@column,@position);
if(@endAt=0)
begin
Select substring(@column,@position,len(@column)-@position);
break;
end;
set @tempString = substring(ltrim(rtrim(@column)),@position,@endAt-@position);


select @tempString;
set @position=@endAt+1;
END;

请参考下面的TSQL。STRING_SPLIT函数仅在兼容性级别130及更高级别下可用。

TSQL:

DECLARE @stringValue NVARCHAR(400) = 'red,blue,green,yellow,black';
DECLARE @separator CHAR = ',';


SELECT [value]  As Colour
FROM STRING_SPLIT(@stringValue, @separator);

结果:

颜色

< p >红 蓝色的 绿色 黄色的 黑色< / p >

很晚了,但是试试这个:

SELECT ColumnID, Column1, value  --Do not change 'value' name. Leave it as it is.
FROM tbl_Sample
CROSS APPLY STRING_SPLIT(Tags, ','); --'Tags' is the name of column containing comma separated values
所以我们有这个: tbl_Sample: < / p >
ColumnID|   Column1 |   Tags
--------|-----------|-------------
1       |   ABC     |   10,11,12
2       |   PQR     |   20,21,22

运行此查询后:

ColumnID|   Column1 |   value
--------|-----------|-----------
1       |   ABC     |   10
1       |   ABC     |   11
1       |   ABC     |   12
2       |   PQR     |   20
2       |   PQR     |   21
2       |   PQR     |   22

谢谢!

可以使用以下函数提取数据

CREATE FUNCTION [dbo].[SplitString]
(
@RowData NVARCHAR(MAX),
@Delimeter NVARCHAR(MAX)
)
RETURNS @RtnValue TABLE
(
ID INT IDENTITY(1,1),
Data NVARCHAR(MAX)
)
AS
BEGIN
DECLARE @Iterator INT;
SET @Iterator = 1;


DECLARE @FoundIndex INT;
SET @FoundIndex = CHARINDEX(@Delimeter,@RowData);


WHILE (@FoundIndex>0)
BEGIN
INSERT INTO @RtnValue (data)
SELECT
Data = LTRIM(RTRIM(SUBSTRING(@RowData, 1, @FoundIndex - 1)));


SET @RowData = SUBSTRING(@RowData,
@FoundIndex + DATALENGTH(@Delimeter) / 2,
LEN(@RowData));


SET @Iterator = @Iterator + 1;
SET @FoundIndex = CHARINDEX(@Delimeter, @RowData);
END;
    

INSERT INTO @RtnValue (Data)
SELECT Data = LTRIM(RTRIM(@RowData));


RETURN;
END;

通过创建这个分割字符串的函数([DelimitedSplit]),可以对SELECT执行OUTER APPLY。

CREATE FUNCTION [dbo].[DelimitedSplit]
--===== Define I/O parameters
(@pString VARCHAR(8000), @pDelimiter CHAR(1))
--WARNING!!! DO NOT USE MAX DATA-TYPES HERE!  IT WILL KILL PERFORMANCE!
RETURNS TABLE WITH SCHEMABINDING AS
RETURN
--===== "Inline" CTE Driven "Tally Table" produces values from 1 up to 10,000...
-- enough to cover VARCHAR(8000)
WITH E1(N) AS (
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1
),                          --10E+1 or 10 rows
E2(N) AS (SELECT 1 FROM E1 a INNER JOIN E1 b ON b.N = a.N), --10E+2 or 100 rows
E4(N) AS (SELECT 1 FROM E2 a INNER JOIN E2 b ON b.N = a.N), --10E+4 or 10,000 rows max
cteTally(N) AS (--==== This provides the "base" CTE and limits the number of rows right up front
-- for both a performance gain and prevention of accidental "overruns"
SELECT TOP (ISNULL(DATALENGTH(@pString),0)) ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) FROM E4
),
cteStart(N1) AS (--==== This returns N+1 (starting position of each "element" just once for each delimiter)
SELECT 1 UNION ALL
SELECT t.N+1 FROM cteTally t WHERE SUBSTRING(@pString,t.N,1) = @pDelimiter
),
cteLen(N1,L1) AS(--==== Return start and length (for use in substring)
SELECT s.N1,
ISNULL(NULLIF(CHARINDEX(@pDelimiter,@pString,s.N1),0)-s.N1,8000)
FROM cteStart s
)
--===== Do the actual split. The ISNULL/NULLIF combo handles the length for the final element when no delimiter is found.
SELECT ItemNumber = ROW_NUMBER() OVER(ORDER BY l.N1),
Item       = SUBSTRING(@pString, l.N1, l.L1)
FROM cteLen l
;

测试

CREATE TABLE #Testdata
(
SomeID INT,
OtherID INT,
String VARCHAR(MAX)
);


INSERT #Testdata SELECT 1,  9, '18,20,22';
INSERT #Testdata SELECT 2,  8, '17,19';
INSERT #Testdata SELECT 3,  7, '13,19,20';
INSERT #Testdata SELECT 4,  6, '';
INSERT #Testdata SELECT 9, 11, '1,2,3,4';


SELECT
*
FROM #Testdata
OUTER APPLY [dbo].[DelimitedSplit](String,',');


DROP TABLE #Testdata;

结果

SomeID  OtherID String      ItemNumber  Item
1       9       18,20,22    1           18
1       9       18,20,22    2           20
1       9       18,20,22    3           22
2       8       17,19       1           17
2       8       17,19       2           19
3       7       13,19,20    1           13
3       7       13,19,20    2           19
3       7       13,19,20    3           20
4       6       1
9       11      1,2,3,4     1           1
9       11      1,2,3,4     2           2
9       11      1,2,3,4     3           3
9       11      1,2,3,4     4           4

我知道它有很多答案,但我想写我的版本的分裂函数像其他人和像string_split SQL Server 2016本机函数。

create function [dbo].[Split]
(
@Value nvarchar(max),
@Delimiter nvarchar(50)
)
returns @tbl table
(
Seq int primary key identity(1, 1),
Value nvarchar(max)
)
as begin
declare @Xml xml = cast('<d>' + replace(@Value, @Delimiter, '</d><d>') + '</d>' as xml);


insert into @tbl
(Value)
select  a.split.value('.', 'nvarchar(max)') as Value
from    @Xml.nodes('/d') a(split);
    

return;
end;
  • Seq列是支持快速连接其他实表或Split函数返回表的主键。
  • 使用XML函数支持大数据(当你有大数据时,循环版本会显著变慢)

这是问题的答案。

CREATE TABLE Testdata
(
SomeID INT,
OtherID INT,
String VARCHAR(MAX)
);


INSERT Testdata SELECT 1,  9, '18,20,22';
INSERT Testdata SELECT 2,  8, '17,19';
INSERT Testdata SELECT 3,  7, '13,19,20';
INSERT Testdata SELECT 4,  6, '';
INSERT Testdata SELECT 9, 11, '1,2,3,4';




select  t.SomeID, t.OtherID, s.Value
from    Testdata t
cross apply dbo.Split(t.String, ',') s;


--Output
SomeID  OtherID Value
1       9       18
1       9       20
1       9       22
2       8       17
2       8       19
3       7       13
3       7       19
3       7       20
4       6
9       11      1
9       11      2
9       11      3
9       11      4

加入Split与其他Split

declare @Names nvarchar(max) = 'a,b,c,d';
declare @Codes nvarchar(max) = '10,20,30,40';


select  n.Seq, n.Value Name, c.Value Code
from    dbo.Split(@Names, ',') n
inner join dbo.Split(@Codes, ',') c on n.Seq = c.Seq;


--Output
Seq Name    Code
1   a       10
2   b       20
3   c       30
4   d       40

分开两次

declare @NationLocSex nvarchar(max) = 'Korea,Seoul,1;Vietnam,Kiengiang,0;China,Xian,0';


with rows as
(
select  Value
from    dbo.Split(@NationLocSex, ';')
)
select  rw.Value r, cl.Value c
from    rows rw
cross apply dbo.Split(rw.Value, ',') cl;


--Output
r                       c
Korea,Seoul,1           Korea
Korea,Seoul,1           Seoul
Korea,Seoul,1           1
Vietnam,Kiengiang,0     Vietnam
Vietnam,Kiengiang,0     Kiengiang
Vietnam,Kiengiang,0     0
China,Xian,0            China
China,Xian,0            Xian
China,Xian,0            0

分割成列

declare @Numbers nvarchar(50) = 'First,Second,Third';


with t as
(
select  case when Seq = 1 then Value end f1,
case when Seq = 2 then Value end f2,
case when Seq = 3 then Value end f3
from    dbo.Split(@Numbers, ',')
)
select  min(f1) f1, min(f2) f2, min(f3) f3
from    t;


--Output
f1      f2      f3
First   Second  Third

按范围生成行


declare @Ranges nvarchar(50) = '1-2,4-6';


declare @Numbers table (Num int);
insert into @Numbers values (1),(2),(3),(4),(5),(6),(7),(8);


with t as
(
select  r.Seq, r.Value,
min(case when ft.Seq = 1 then ft.Value end) ValueFrom,
min(case when ft.Seq = 2 then ft.Value end) ValueTo
from    dbo.Split(@Ranges, ',') r
cross apply dbo.Split(r.Value, '-') ft
group by r.Seq, r.Value
)
select  t.Seq, t.Value, t.ValueFrom, t.ValueTo, n.Num
from    t
inner join @Numbers n on n.Num between t.ValueFrom and t.ValueTo;


--Output
Seq Value   ValueFrom   ValueTo Num
1   1-2     1           2       1
1   1-2     1           2       2
2   4-6     4           6       4
2   4-6     4           6       5
2   4-6     4           6       6