我的客户端发送给我的名称数据作为一个name字符串,其中包括在单个条目中的姓、名和中名。我需要将它们拆分为LastName、FirstName和midlename。我在网上找到了一些脚本,但它们不能满足我的目的,因为它们要么(1)使用不同的格式,要么(2)不能很好地处理边缘情况。请看下面的例子:
- 南丁格尔,佛罗伦萨->佛罗伦萨南丁格尔
- 邦德,詹姆斯·邦德->詹姆斯·邦德邦德
- Abbott, Edwin A. -> Edwin A. Abbott
有人能帮我写一个SQL Server脚本,将字符串分成我正在寻找的各个部分?
请注意以下事项:
- 始终请求规范化数据以确保最高的数据质量。我试着列举出最后、第一和中间名组合的所有可能情况,但我确定我没有得到所有的情况。
- 我的脚本需要的格式:LastName@DELIMITER1@DELIMITER2FirstName@DELIMITER2MiddleName,但可以很容易地更改为其他格式。
- 这个脚本不像Dr.那样分隔贴图,也不处理后缀。 我扩展了测试数据集的想法,这要归功于MemKills。
>
DECLARE @DELIMITER1 varchar(1), @DELIMITER2 varchar(1), @MAX_LENGTH int
SET @DELIMITER1 = ','
SET @DELIMITER2 = ' '
SET @MAX_LENGTH = 50
SELECT [Name],
SUBSTRING(Name,1,CHARINDEX(@DELIMITER1,Name) -1) AS LastName, -- Less one char for @DELIMITER1
SUBSTRING(Name,CHARINDEX(@DELIMITER1,Name)+ 2,@MAX_LENGTH) AS FirstAndMiddle, -- Plus two for @DELIMITER1 and @DELIMITER2
CASE
-- Middle name follows two-name first names like Mary Ann
WHEN LEN(SUBSTRING(NAME, CHARINDEX(@DELIMITER1,Name)+ 2,@MAX_LENGTH)) - LEN(REPLACE(SUBSTRING(NAME, CHARINDEX(@DELIMITER1,Name)+ 2,@MAX_LENGTH), @DELIMITER2, '')) > 0
THEN SUBSTRING(Name, LEN(Name) - CHARINDEX(@DELIMITER2, REVERSE(Name))+2, @MAX_LENGTH)
ELSE NULL
END AS MiddleName,
CASE
-- Count the number of @DELIMITER2. Choose the string between the @DELIMITER1 and the final @DELIMITER2.
WHEN LEN(SUBSTRING(NAME, CHARINDEX(@DELIMITER1,Name)+ 2,@MAX_LENGTH)) - LEN(REPLACE(SUBSTRING(NAME, CHARINDEX(@DELIMITER1,Name)+ 2,@MAX_LENGTH), @DELIMITER2, '')) > 0
Then SUBSTRING(Name, CHARINDEX(@DELIMITER1,Name)+ 2,
(LEN(SUBSTRING(NAME, CHARINDEX(@DELIMITER1,Name)+ 2,@MAX_LENGTH))
- LEN(SUBSTRING(Name, LEN(Name) - CHARINDEX(@DELIMITER2, REVERSE(Name))+2, @MAX_LENGTH))))
ELSE SUBSTRING(Name,CHARINDEX(@DELIMITER1,Name)+ 2,@MAX_LENGTH)
END AS FirstName
FROM
(
SELECT [Name] = 'Zzz, A' UNION ALL
SELECT 'de Zzz, Aaa' UNION ALL
SELECT 'Zzz, Aaaa' UNION ALL
SELECT 'Zzz, A B' UNION ALL
SELECT 'Zzz, Aaaa Bbbb' UNION ALL
SELECT 'de Zzz, Aaaa' UNION ALL
SELECT 'de Zzz, Aaaa B' UNION ALL
SELECT 'van Zzz, Aaaa B' UNION ALL
SELECT 'Yyy-Zzz, Aaaa B' UNION ALL
SELECT 'd''Zzz, Aaaa B' UNION ALL
SELECT 'Zzz, Aaaa Bbbb C' UNION ALL
SELECT 'Zzz, Aaaa Bbbb Cccc'
) AS X
试试下面的代码。我觉得这样更有效率一点。请随意修改或改进。谢谢。
DECLARE @FullName VARCHAR(60),
@FirstName VARCHAR(30),
@LastName VARCHAR(30),
@MiddleInitialPrep VARCHAR(60) = null,
@MiddleInitial VARCHAR(1) = null
SET @FullName = 'Dr. John Edward Doe III'
-- NAME CLEAN UP TO REMOVE PREFIXES AND SUFFIXES
SET @FullName = REPLACE(@FullName, 'Mr. ', '')
SET @FullName = REPLACE(@FullName, 'Mr ', '')
SET @FullName = REPLACE(@FullName, 'Mrs. ', '')
SET @FullName = REPLACE(@FullName, 'Mrs ', '')
SET @FullName = REPLACE(@FullName, 'Ms. ', '')
SET @FullName = REPLACE(@FullName, 'Ms ', '')
SET @FullName = REPLACE(@FullName, 'Miss ', '')
SET @FullName = REPLACE(@FullName, 'Dr. ', '')
SET @FullName = REPLACE(@FullName, 'Dr ', '')
SET @FullName = REPLACE(@FullName, ' Jr.', '')
SET @FullName = REPLACE(@FullName, ' Jr', '')
SET @FullName = REPLACE(@FullName, ' Sr.', '')
SET @FullName = REPLACE(@FullName, ' Sr', '')
SET @FullName = REPLACE(@FullName, ' III', '')
SET @FullName = REPLACE(@FullName, ' II', '')
-- RETRIEVE FIRST AND LAST NAMES
SET @FirstName = LEFT(@FullName, NULLIF(CHARINDEX(' ', @FullName) - 1, -1))
SET @LastName = RIGHT(@FullName, ISNULL(NULLIF(CHARINDEX(' ', REVERSE(@FullName)) - 1, -1), LEN(@FullName)))
-- ISOLATE MIDDLE INITIAL
SET @MiddleInitialPrep = REPLACE(@FullName, @FirstName, '')
SET @MiddleInitialPrep = REPLACE(@MiddleInitialPrep, @LastName, '')
SET @MiddleInitial = REPLACE(@MiddleInitialPrep, ' ', '')
SELECT @FirstName First_Name, @MiddleInitial Middle_Initial, @LastName Last_Name
下面的代码适用于Last, First M名称字符串。将"Name"替换为名称字符串列名。因为当中间有一个首字母时,最后一个字符是句号,所以可以将每行(2,6和8)中的2替换为3,并将第8行中的"RIGHT(Name, 1)"更改为"RIGHT(Name, 2)"。
SELECT SUBSTRING(Name, 1, CHARINDEX(',', Name) - 1) LastName ,
CASE WHEN LEFT(RIGHT(Name, 2), 1) <> ' '
THEN LTRIM(SUBSTRING(Name, CHARINDEX(',', Name) + 1, 99))
ELSE LEFT(LTRIM(SUBSTRING(Name, CHARINDEX(',', Name) + 1, 99)),
LEN(LTRIM(SUBSTRING(Name, CHARINDEX(',', Name) + 1, 99)))
- 2)
END FirstName ,
CASE WHEN LEFT(RIGHT(Name, 2), 1) = ' ' THEN RIGHT(Name, 1)
ELSE NULL
END MiddleName
好办法。我做了一些修改来适应我的情况,其中分隔符是空格,中间名只是中间的首字母(有时不存在)。下面的解决方案甚至可以解析多间隔的名字,例如:"Jo Ann Taylor Haynes",没有中间的首字母。
SET @DELIMITER1 = ' '
SET @DELIMITER2 = ' '
SET @MAX_LENGTH = 50
SELECT [Name],
SUBSTRING(Name,1,CHARINDEX(@DELIMITER1,Name) -1) AS LastName,
SUBSTRING(Name,CHARINDEX(@DELIMITER1,Name)+ 1,@MAX_LENGTH) AS FirstAndMiddle,
CASE
WHEN LEN(SUBSTRING(NAME, CHARINDEX(@DELIMITER1,Name)+ 1,@MAX_LENGTH)) - LEN(REPLACE(SUBSTRING(NAME, CHARINDEX(@DELIMITER1,Name)+ 1,@MAX_LENGTH), @DELIMITER2, '')) = 1
THEN SUBSTRING(Name, LEN(Name) - CHARINDEX(@DELIMITER2, REVERSE(Name))+1, @MAX_LENGTH)
ELSE NULL
END AS MiddleName,
CASE
WHEN LEN(SUBSTRING(NAME, CHARINDEX(@DELIMITER1,Name)+ 1,@MAX_LENGTH)) - LEN(REPLACE(SUBSTRING(NAME, CHARINDEX(@DELIMITER1,Name)+ 1,@MAX_LENGTH), @DELIMITER2, '')) = 1
Then SUBSTRING(Name, CHARINDEX(@DELIMITER1,Name)+ 1,
(LEN(SUBSTRING(NAME, CHARINDEX(@DELIMITER1,Name)+ 1,@MAX_LENGTH))
- LEN(SUBSTRING(Name, LEN(Name) - CHARINDEX(@DELIMITER2, REVERSE(Name))+1, @MAX_LENGTH))))
ELSE SUBSTRING(Name,CHARINDEX(@DELIMITER1,Name)+ 1,@MAX_LENGTH)
END AS FirstName
select substr(
'santhosh kumar kota'
,1,
instr(
'santhosh kumar kota'
,' '
,1
,1)
) as fname
,substr('santhosh kumar kota'
,instr(
'santhosh kumar kota'
,' '
,1
,1)
,(instr(
'santhosh kumar kota'
,' '
,1
,2)-instr(
'santhosh kumar kota'
,' '
,1
,1)
)
)as mname
,substr('santhosh kumar kota'
,instr(
'santhosh kumar kota'
,' '
,1
,2)
,(length('santhosh kumar kota')+1)-instr(
'santhosh kumar kota'
,' '
,1
,2)
)as lname
from dual
/