c#消毒文件名

最近,我一直在将一堆mp3从不同的位置转移到一个存储库中。我一直在使用ID3标记构造新文件名(谢谢,TagLib-Sharp!),我注意到我得到了System.NotSupportedException:

"不支持给定路径的格式。"

这是由File.Copy()Directory.CreateDirectory()生成的。

没过多久我就意识到我的文件名需要清理。所以我做了一件显而易见的事:

public static string SanitizePath_(string path, char replaceChar)
{
string dir = Path.GetDirectoryName(path);
foreach (char c in Path.GetInvalidPathChars())
dir = dir.Replace(c, replaceChar);


string name = Path.GetFileName(path);
foreach (char c in Path.GetInvalidFileNameChars())
name = name.Replace(c, replaceChar);


return dir + name;
}

令我惊讶的是,我继续得到异常。结果证明':'不在Path.GetInvalidPathChars()的集合中,因为它在路径根中是有效的。我想这是有道理的-但这必须是一个相当普遍的问题。谁有一些简短的代码来清除路径?这是我想出的最彻底的方法,但感觉可能有点过头了。

    // replaces invalid characters with replaceChar
public static string SanitizePath(string path, char replaceChar)
{
// construct a list of characters that can't show up in filenames.
// need to do this because ":" is not in InvalidPathChars
if (_BadChars == null)
{
_BadChars = new List<char>(Path.GetInvalidFileNameChars());
_BadChars.AddRange(Path.GetInvalidPathChars());
_BadChars = Utility.GetUnique<char>(_BadChars);
}


// remove root
string root = Path.GetPathRoot(path);
path = path.Remove(0, root.Length);


// split on the directory separator character. Need to do this
// because the separator is not valid in a filename.
List<string> parts = new List<string>(path.Split(new char[]{Path.DirectorySeparatorChar}));


// check each part to make sure it is valid.
for (int i = 0; i < parts.Count; i++)
{
string part = parts[i];
foreach (char c in _BadChars)
{
part = part.Replace(c, replaceChar);
}
parts[i] = part;
}


return root + Utility.Join(parts, Path.DirectorySeparatorChar.ToString());
}

任何改进,使这个功能更快,更少巴洛克式的将是非常赞赏的。

110374 次浏览

如果您将目录和文件名附加在一起并对其进行消毒,而不是分别对它们进行消毒,那么您的代码将更加干净。至于清除:,只需取字符串中的第二个字符。如果它等于"replacechar",用冒号替换它。由于这个应用程序是为您自己使用,这样的解决方案应该是完全足够的。

我认为问题在于你首先在坏字符串上调用Path.GetDirectoryName。如果其中包含非文件名字符,. net就无法判断字符串的哪些部分是目录和抛出。你必须做字符串比较。

假设只有文件名有问题,而不是整个路径,试试这个:

public static string SanitizePath(string path, char replaceChar)
{
int filenamePos = path.LastIndexOf(Path.DirectorySeparatorChar) + 1;
var sb = new System.Text.StringBuilder();
sb.Append(path.Substring(0, filenamePos));
for (int i = filenamePos; i < path.Length; i++)
{
char filenameChar = path[i];
foreach (char c in Path.GetInvalidFileNameChars())
if (filenameChar.Equals(c))
{
filenameChar = replaceChar;
break;
}


sb.Append(filenameChar);
}


return sb.ToString();
}

要清理文件名,可以这样做

private static string MakeValidFileName( string name )
{
string invalidChars = System.Text.RegularExpressions.Regex.Escape( new string( System.IO.Path.GetInvalidFileNameChars() ) );
string invalidRegStr = string.Format( @"([{0}]*\.+$)|([{0}]+)", invalidChars );


return System.Text.RegularExpressions.Regex.Replace( name, invalidRegStr, "_" );
}
using System;
using System.IO;
using System.Linq;
using System.Text;


public class Program
{
public static void Main()
{
try
{
var badString = "ABC\\DEF/GHI<JKL>MNO:PQR\"STU\tVWX|YZA*BCD?EFG";
Console.WriteLine(badString);
Console.WriteLine(SanitizeFileName(badString, '.'));
Console.WriteLine(SanitizeFileName(badString));
}
catch (Exception ex)
{
Console.WriteLine(ex.ToString());
}
}


private static string SanitizeFileName(string fileName, char? replacement = null)
{
if (fileName == null) { return null; }
if (fileName.Length == 0) { return ""; }


var sb = new StringBuilder();
var badChars = Path.GetInvalidFileNameChars().ToList();


foreach (var @char in fileName)
{
if (badChars.Contains(@char))
{
if (replacement.HasValue)
{
sb.Append(replacement.Value);
}
continue;
}
sb.Append(@char);
}
return sb.ToString();
}
}

我过去在这方面取得过成功。

漂亮,短,静态:-)

    public static string returnSafeString(string s)
{
foreach (char character in Path.GetInvalidFileNameChars())
{
s = s.Replace(character.ToString(),string.Empty);
}


foreach (char character in Path.GetInvalidPathChars())
{
s = s.Replace(character.ToString(), string.Empty);
}


return (s);
}

基于Andre的精彩回答,并考虑到Spud对保留词的评论,我做出了这样的版本:

/// <summary>
/// Strip illegal chars and reserved words from a candidate filename (should not include the directory path)
/// </summary>
/// <remarks>
/// http://stackoverflow.com/questions/309485/c-sharp-sanitize-file-name
/// </remarks>
public static string CoerceValidFileName(string filename)
{
var invalidChars = Regex.Escape(new string(Path.GetInvalidFileNameChars()));
var invalidReStr = string.Format(@"[{0}]+", invalidChars);


var reservedWords = new []
{
"CON", "PRN", "AUX", "CLOCK$", "NUL", "COM0", "COM1", "COM2", "COM3", "COM4",
"COM5", "COM6", "COM7", "COM8", "COM9", "LPT0", "LPT1", "LPT2", "LPT3", "LPT4",
"LPT5", "LPT6", "LPT7", "LPT8", "LPT9"
};


var sanitisedNamePart = Regex.Replace(filename, invalidReStr, "_");
foreach (var reservedWord in reservedWords)
{
var reservedWordPattern = string.Format("^{0}\\.", reservedWord);
sanitisedNamePart = Regex.Replace(sanitisedNamePart, reservedWordPattern, "_reservedWord_.", RegexOptions.IgnoreCase);
}


return sanitisedNamePart;
}

这些是单元测试

[Test]
public void CoerceValidFileName_SimpleValid()
{
var filename = @"thisIsValid.txt";
var result = PathHelper.CoerceValidFileName(filename);
Assert.AreEqual(filename, result);
}


[Test]
public void CoerceValidFileName_SimpleInvalid()
{
var filename = @"thisIsNotValid\3\\_3.txt";
var result = PathHelper.CoerceValidFileName(filename);
Assert.AreEqual("thisIsNotValid_3__3.txt", result);
}


[Test]
public void CoerceValidFileName_InvalidExtension()
{
var filename = @"thisIsNotValid.t\xt";
var result = PathHelper.CoerceValidFileName(filename);
Assert.AreEqual("thisIsNotValid.t_xt", result);
}


[Test]
public void CoerceValidFileName_KeywordInvalid()
{
var filename = "aUx.txt";
var result = PathHelper.CoerceValidFileName(filename);
Assert.AreEqual("_reservedWord_.txt", result);
}


[Test]
public void CoerceValidFileName_KeywordValid()
{
var filename = "auxillary.txt";
var result = PathHelper.CoerceValidFileName(filename);
Assert.AreEqual("auxillary.txt", result);
}

一个简短的解决方案:

var invalids = System.IO.Path.GetInvalidFileNameChars();
var newName = String.Join("_", origFileName.Split(invalids, StringSplitOptions.RemoveEmptyEntries) ).TrimEnd('.');
string clean = String.Concat(dirty.Split(Path.GetInvalidFileNameChars()));

我正在使用System.IO.Path.GetInvalidFileNameChars()方法检查无效字符,我没有任何问题。

我正在使用以下代码:

foreach( char invalidchar in System.IO.Path.GetInvalidFileNameChars())
{
filename = filename.Replace(invalidchar, '_');
}

我希望以某种方式保留字符,而不仅仅是简单地用下划线替换字符。

我想到的一种方法是用类似的字符替换这些字符(在我的情况下),不太可能被用作常规字符。所以我从无效字符列表中找到了类似字符。

下面是使用look-a-like进行编码和解码的函数。

这段代码不包括所有System.IO.Path.GetInvalidFileNameChars()字符的完整列表。因此,由您来扩展或使用下划线替换任何剩余的字符

private static Dictionary<string, string> EncodeMapping()
{
//-- Following characters are invalid for windows file and folder names.
//-- \/:*?"<>|
Dictionary<string, string> dic = new Dictionary<string, string>();
dic.Add(@"\", "Ì"); // U+OOCC
dic.Add("/", "Í"); // U+OOCD
dic.Add(":", "¦"); // U+00A6
dic.Add("*", "¤"); // U+00A4
dic.Add("?", "¿"); // U+00BF
dic.Add(@"""", "ˮ"); // U+02EE
dic.Add("<", "«"); // U+00AB
dic.Add(">", "»"); // U+00BB
dic.Add("|", "│"); // U+2502
return dic;
}


public static string Escape(string name)
{
foreach (KeyValuePair<string, string> replace in EncodeMapping())
{
name = name.Replace(replace.Key, replace.Value);
}


//-- handle dot at the end
if (name.EndsWith(".")) name = name.CropRight(1) + "°";


return name;
}


public static string UnEscape(string name)
{
foreach (KeyValuePair<string, string> replace in EncodeMapping())
{
name = name.Replace(replace.Value, replace.Key);
}


//-- handle dot at the end
if (name.EndsWith("°")) name = name.CropRight(1) + ".";


return name;
}

你可以选择自己喜欢的样子。我使用windows中的字符映射应用程序来选择我的%windir%\system32\charmap.exe

当我通过发现进行调整时,我将更新这段代码。

下面是一个基于Andre代码的高效的惰性加载扩展方法:

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;


namespace LT
{
public static class Utility
{
static string invalidRegStr;


public static string MakeValidFileName(this string name)
{
if (invalidRegStr == null)
{
var invalidChars = System.Text.RegularExpressions.Regex.Escape(new string(System.IO.Path.GetInvalidFileNameChars()));
invalidRegStr = string.Format(@"([{0}]*\.+$)|([{0}]+)", invalidChars);
}


return System.Text.RegularExpressions.Regex.Replace(name, invalidRegStr, "_");
}
}
}

这里有很多可行的解决方案。为了完整起见,这里有一个不使用regex,而是使用LINQ的方法:

var invalids = Path.GetInvalidFileNameChars();
filename = invalids.Aggregate(filename, (current, c) => current.Replace(c, '_'));

而且,这是一个非常简短的解决方案;)

基于@fiat和@Andre的方法,我也想分享我的解决方案。 主要区别:< / p >
  • 这是一种扩展方法
  • Regex在第一次使用时进行编译,以节省大量执行的时间
  • 保留词被保留
public static class StringPathExtensions
{
private static Regex _invalidPathPartsRegex;
    

static StringPathExtensions()
{
var invalidReg = System.Text.RegularExpressions.Regex.Escape(new string(Path.GetInvalidFileNameChars()));
_invalidPathPartsRegex = new Regex($"(?<reserved>^(CON|PRN|AUX|CLOCK\\$|NUL|COM0|COM1|COM2|COM3|COM4|COM5|COM6|COM7|COM8|COM9|LPT0|LPT1|LPT2|LPT3|LPT4|LPT5|LPT6|LPT7|LPT8|LPT9))|(?<invalid>[{invalidReg}:]+|\\.$)", RegexOptions.Compiled);
}


public static string SanitizeFileName(this string path)
{
return _invalidPathPartsRegex.Replace(path, m =>
{
if (!string.IsNullOrWhiteSpace(m.Groups["reserved"].Value))
return string.Concat("_", m.Groups["reserved"].Value);
return "_";
});
}
}