CREATE FUNCTION [dbo].[fn_GetPYFirstLetter]
(
@str NVARCHAR(4000)
)
RETURNS NVARCHAR(4000)
AS
BEGIN
DECLARE @py TABLE (
ch char(2) COLLATE Chinese_PRC_CI_AS NOT NULL,
py nchar(1) COLLATE Chinese_PRC_CI_AS NULL
)
-- 插入常用汉字拼音首字母对照表(部分示例)
INSERT INTO @py(ch, py)
VALUES
('吖','A'),('八','B'),('嚓','C'),('咑','D'),('鵽','E'),
('发','F'),('旮','G'),('哈','H'),('丌','J'),('咔','K'),
('垃','L'),('嘸','M'),('拏','N'),('噢','O'),('妑','P'),
('七','Q'),('冄','R'),('仨','S'),('他','T'),('屲','W'),
('夕','X'),('丫','Y'),('帀','Z'),
-- 更多汉字可以继续添加...
('阿','A'),('芭','B'),('擦','C'),('搭','D'),('蛾','E'),
('发','F'),('噶','G'),('哈','H'),('击','J'),('喀','K'),
('垃','L'),('妈','M'),('拿','N'),('哦','O'),('啪','P'),
('期','Q'),('然','R'),('撒','S'),('塌','T'),('挖','W'),
('西','X'),('压','Y'),('杂','Z')
DECLARE @result NVARCHAR(4000) = ''
DECLARE @i INT = 1
DECLARE @len INT = LEN(@str)
WHILE @i <= @len
BEGIN
DECLARE @ch NCHAR(1) = SUBSTRING(@str, @i, 1)
IF @ch >= N'吖' AND @ch <= N'座' -- 汉字范围
BEGIN
-- 获取拼音首字母
SELECT @result = @result + ISNULL(py, @ch)
FROM @py
WHERE ch >= @ch COLLATE Chinese_PRC_CI_AS
ORDER BY ch
OFFSET 0 ROWS FETCH NEXT 1 ROWS ONLY
END
ELSE IF @ch BETWEEN 'a' AND 'z' OR @ch BETWEEN 'A' AND 'Z'
BEGIN
-- 英文字母直接取大写
SET @result = @result + UPPER(@ch)
END
ELSE
BEGIN
-- 其他字符保持不变
SET @result = @result + @ch
END
SET @i = @i + 1
END
RETURN @result
END
GO
CREATE FUNCTION [dbo].[fn_GetChineseFirstLetter]
(
@str NVARCHAR(4000)
)
RETURNS NVARCHAR(4000)
AS
BEGIN
DECLARE @result NVARCHAR(4000) = ''
DECLARE @i INT = 1
DECLARE @len INT = LEN(@str)
WHILE @i <= @len
BEGIN
DECLARE @ch NCHAR(1) = SUBSTRING(@str, @i, 1)
DECLARE @asc INT = UNICODE(@ch)
-- 判断是否为汉字
IF @asc >= 19968 AND @asc <= 40869 -- 汉字UNICODE范围
BEGIN
-- 根据拼音首字母分区判断
SET @result = @result +
CASE
WHEN @ch >= N'吖' AND @ch <= N'驁' THEN 'A'
WHEN @ch >= N'八' AND @ch <= N'簿' THEN 'B'
WHEN @ch >= N'嚓' AND @ch <= N'錯' THEN 'C'
WHEN @ch >= N'咑' AND @ch <= N'鵽' THEN 'D'
WHEN @ch >= N'妸' AND @ch <= N'樲' THEN 'E'
WHEN @ch >= N'发' AND @ch <= N'猤' THEN 'F'
WHEN @ch >= N'旮' AND @ch <= N'腂' THEN 'G'
WHEN @ch >= N'哈' AND @ch <= N'夻' THEN 'H'
WHEN @ch >= N'丌' AND @ch <= N'攈' THEN 'J'
WHEN @ch >= N'咔' AND @ch <= N'穒' THEN 'K'
WHEN @ch >= N'垃' AND @ch <= N'擽' THEN 'L'
WHEN @ch >= N'嘸' AND @ch <= N'椧' THEN 'M'
WHEN @ch >= N'拏' AND @ch <= N'瘧' THEN 'N'
WHEN @ch >= N'噢' AND @ch <= N'漚' THEN 'O'
WHEN @ch >= N'妑' AND @ch <= N'曝' THEN 'P'
WHEN @ch >= N'七' AND @ch <= N'裠' THEN 'Q'
WHEN @ch >= N'亽' AND @ch <= N'鶸' THEN 'R'
WHEN @ch >= N'仨' AND @ch <= N'蜶' THEN 'S'
WHEN @ch >= N'他' AND @ch <= N'籜' THEN 'T'
WHEN @ch >= N'屲' AND @ch <= N'鶩' THEN 'W'
WHEN @ch >= N'夕' AND @ch <= N'鑂' THEN 'X'
WHEN @ch >= N'丫' AND @ch <= N'韻' THEN 'Y'
WHEN @ch >= N'帀' AND @ch <= N'咗' THEN 'Z'
ELSE @ch
END
END
ELSE IF (@ch >= 'a' AND @ch <= 'z') OR (@ch >= 'A' AND @ch <= 'Z')
BEGIN
SET @result = @result + UPPER(@ch)
END
ELSE
BEGIN
SET @result = @result + @ch
END
SET @i = @i + 1
END
RETURN @result
END
GO
如果对性能要求高,可以使用 SQL CLR 集成:
创建 C# 类库项目:
using System;
using System.Data.SqlTypes;
using Microsoft.SqlServer.Server;
public class ChineseHelper { [SqlFunction] public static SqlString GetFirstLetter(SqlString input) { if (input.IsNull) return SqlString.Null;
string str = input.Value;
char[] chars = str.ToCharArray();
for (int i = 0; i < chars.Length; i++)
{
chars[i] = GetPinyin(chars[i]);
}
return new SqlString(new string(chars));
}
private static char GetPinyin(char ch)
{
// 使用成熟的拼音库,如 NPinyin
// 这里简化为返回首字母
if (ch >= 0x4E00 && ch <= 0x9FA5) // 汉字范围
{
// 这里应该调用拼音转换库
// 暂时返回 'A' 作为示例
return 'A';
}
return Char.ToUpper(ch);
}
}
2. **在 SQL Server 中注册程序集和函数**:
```sql
-- 启用 CLR
EXEC sp_configure 'clr enabled', 1;
RECONFIGURE;
-- 创建程序集
CREATE ASSEMBLY ChineseFunctions
FROM 'C:\path\to\ChineseHelper.dll'
WITH PERMISSION_SET = SAFE;
-- 创建函数
CREATE FUNCTION dbo.fn_GetFirstLetterCLR(@str NVARCHAR(MAX))
RETURNS NVARCHAR(MAX)
AS EXTERNAL NAME ChineseFunctions.ChineseHelper.GetFirstLetter;
-- 使用自定义函数
SELECT dbo.fn_GetChineseFirstLetter(N'张三李四') -- 返回:ZSLS
SELECT dbo.fn_GetChineseFirstLetter(N'北京上海') -- 返回:BJSH
SELECT dbo.fn_GetChineseFirstLetter(N'Hello 世界') -- 返回:HELLO SJ
准确性:上述简单方法可能无法覆盖所有汉字,特别是多音字
性能:对于大量数据处理,方案三(CLR)性能最好
维护:如果需要精确的拼音首字母,建议:
多音字处理:上述简单方案无法处理多音字,如:
选择哪种方案取决于你的具体需求、数据量和准确性要求。