如何计算子字符串的出现次数?

假设我有这样一个字符串:

MyString = "OU=Level3,OU=Level2,OU=Level1,DC=domain,DC=com";

然后我想知道在这个字符串中子字符串“ OU =”出现了多少次。使用单个字符,可能会有这样的东西:

int count = MyString.Split("OU=").Length - 1;

但是 Split只适用于 char,而不适用于 string

还有如何找到 n 个出现的位置? 例如,字符串中第2个 "OU="的位置?

如何解决这个问题?

115514 次浏览
Regex.Matches(input, "OU=").Count
int count = myString.Split(new []{','})
.Count(item => item.StartsWith(
"OU=", StringComparison.OrdinalIgnoreCase))

below should work

  MyString = "OU=Level3,OU=Level2,OU=Level1,DC=domain,DC=com";
int count = Regex.Matches(MyString, "OU=").Count

You can find all the occurrences and their positions with IndexOf:

string MyString = "OU=Level3,OU=Level2,OU=Level1,DC=domain,DC=com";
string stringToFind = "OU=";


List<int> positions = new List<int>();
int pos = 0;
while ((pos < MyString.Length) && (pos = MyString.IndexOf(stringToFind, pos)) != -1)
{
positions.Add(pos);
pos += stringToFind.Length();
}


Console.WriteLine("{0} occurrences", positions.Count);
foreach (var p in positions)
{
Console.WriteLine(p);
}

You can get the same result from a regular expression:

var matches = Regex.Matches(MyString, "OU=");
Console.WriteLine("{0} occurrences", matches.Count);
foreach (var m in matches)
{
Console.WriteLine(m.Index);
}

The primary differences:

  • The Regex code is shorter
  • The Regex code allocates a collection and multiple strings.
  • The IndexOf code could be written to output the position immediately, without creating a collection.
  • It's likely that the Regex code will be faster in isolation, but if used many times the combined overhead of the string allocations could cause a much higher load on the garbage collector.

If I were writing this in-line, as something that didn't get used often, I'd probably go with the regex solution. If I were to put it into a library as something to be used a lot, I'd probably go with the IndexOf solution.

(Clippy-mode:ON)

You look like you're parsing an LDAP query!

Would you like to parse it:

  • manually? Goto "SplittingAndParsing"
  • Automagically via Win32 calls? Goto "Using Win32 via PInvoke"

(Clippy-mode:OFF)

"SplittingAndParsing":

var MyString = "OU=Level3,OU=Level2,OU=Level1,DC=domain,DC=com";
var chunksAsKvps = MyString
.Split(',')
.Select(chunk =>
{
var bits = chunk.Split('=');
return new KeyValuePair<string,string>(bits[0], bits[1]);
});


var allOUs = chunksAsKvps
.Where(kvp => kvp.Key.Equals("OU", StringComparison.OrdinalIgnoreCase));

"Using Win32 via PInvoke":

Usage:

var parsedDn = Win32LDAP.ParseDN(str);
var allOUs2 = parsedDn
.Where(dn => dn.Key.Equals("OU", StringComparison.OrdinalIgnoreCase));

Utility Code:

// I don't remember where I got this from, honestly...I *think* it came
// from another SO user long ago, but those details I've lost to history...
public class Win32LDAP
{
#region Constants
public const int ERROR_SUCCESS = 0;
public const int ERROR_BUFFER_OVERFLOW = 111;
#endregion Constants


#region DN Parsing
[DllImport("ntdsapi.dll", CharSet = CharSet.Unicode)]
protected static extern int DsGetRdnW(
ref IntPtr ppDN,
ref int pcDN,
out IntPtr ppKey,
out int pcKey,
out IntPtr ppVal,
out int pcVal
);


public static KeyValuePair<string, string> GetName(string distinguishedName)
{
IntPtr pDistinguishedName = Marshal.StringToHGlobalUni(distinguishedName);
try
{
IntPtr pDN = pDistinguishedName, pKey, pVal;
int cDN = distinguishedName.Length, cKey, cVal;


int lastError = DsGetRdnW(ref pDN, ref cDN, out pKey, out cKey, out pVal, out cVal);


if(lastError == ERROR_SUCCESS)
{
string key, value;


if(cKey < 1)
{
key = string.Empty;
}
else
{
key = Marshal.PtrToStringUni(pKey, cKey);
}


if(cVal < 1)
{
value = string.Empty;
}
else
{
value = Marshal.PtrToStringUni(pVal, cVal);
}


return new KeyValuePair<string, string>(key, value);
}
else
{
throw new Win32Exception(lastError);
}
}
finally
{
Marshal.FreeHGlobal(pDistinguishedName);
}
}


public static IEnumerable<KeyValuePair<string, string>> ParseDN(string distinguishedName)
{
List<KeyValuePair<string, string>> components = new List<KeyValuePair<string, string>>();
IntPtr pDistinguishedName = Marshal.StringToHGlobalUni(distinguishedName);
try
{
IntPtr pDN = pDistinguishedName, pKey, pVal;
int cDN = distinguishedName.Length, cKey, cVal;


do
{
int lastError = DsGetRdnW(ref pDN, ref cDN, out pKey, out cKey, out pVal, out cVal);


if(lastError == ERROR_SUCCESS)
{
string key, value;


if(cKey < 0)
{
key = null;
}
else if(cKey == 0)
{
key = string.Empty;
}
else
{
key = Marshal.PtrToStringUni(pKey, cKey);
}


if(cVal < 0)
{
value = null;
}
else if(cVal == 0)
{
value = string.Empty;
}
else
{
value = Marshal.PtrToStringUni(pVal, cVal);
}


components.Add(new KeyValuePair<string, string>(key, value));


pDN = (IntPtr)(pDN.ToInt64() + UnicodeEncoding.CharSize); //skip over comma
cDN--;
}
else
{
throw new Win32Exception(lastError);
}
} while(cDN > 0);


return components;
}
finally
{
Marshal.FreeHGlobal(pDistinguishedName);
}
}


[DllImport("ntdsapi.dll", CharSet = CharSet.Unicode)]
protected static extern int DsQuoteRdnValueW(
int cUnquotedRdnValueLength,
string psUnquotedRdnValue,
ref int pcQuotedRdnValueLength,
IntPtr psQuotedRdnValue
);


public static string QuoteRDN(string rdn)
{
if (rdn == null) return null;


int initialLength = rdn.Length;
int quotedLength = 0;
IntPtr pQuotedRDN = IntPtr.Zero;


int lastError = DsQuoteRdnValueW(initialLength, rdn, ref quotedLength, pQuotedRDN);


switch (lastError)
{
case ERROR_SUCCESS:
{
return string.Empty;
}
case ERROR_BUFFER_OVERFLOW:
{
break; //continue
}
default:
{
throw new Win32Exception(lastError);
}
}


pQuotedRDN = Marshal.AllocHGlobal(quotedLength * UnicodeEncoding.CharSize);


try
{
lastError = DsQuoteRdnValueW(initialLength, rdn, ref quotedLength, pQuotedRDN);


switch(lastError)
{
case ERROR_SUCCESS:
{
return Marshal.PtrToStringUni(pQuotedRDN, quotedLength);
}
default:
{
throw new Win32Exception(lastError);
}
}
}
finally
{
if(pQuotedRDN != IntPtr.Zero)
{
Marshal.FreeHGlobal(pQuotedRDN);
}
}
}




[DllImport("ntdsapi.dll", CharSet = CharSet.Unicode)]
protected static extern int DsUnquoteRdnValueW(
int cQuotedRdnValueLength,
string psQuotedRdnValue,
ref int pcUnquotedRdnValueLength,
IntPtr psUnquotedRdnValue
);


public static string UnquoteRDN(string rdn)
{
if (rdn == null) return null;


int initialLength = rdn.Length;
int unquotedLength = 0;
IntPtr pUnquotedRDN = IntPtr.Zero;


int lastError = DsUnquoteRdnValueW(initialLength, rdn, ref unquotedLength, pUnquotedRDN);


switch (lastError)
{
case ERROR_SUCCESS:
{
return string.Empty;
}
case ERROR_BUFFER_OVERFLOW:
{
break; //continue
}
default:
{
throw new Win32Exception(lastError);
}
}


pUnquotedRDN = Marshal.AllocHGlobal(unquotedLength * UnicodeEncoding.CharSize);


try
{
lastError = DsUnquoteRdnValueW(initialLength, rdn, ref unquotedLength, pUnquotedRDN);


switch(lastError)
{
case ERROR_SUCCESS:
{
return Marshal.PtrToStringUni(pUnquotedRDN, unquotedLength);
}
default:
{
throw new Win32Exception(lastError);
}
}
}
finally
{
if(pUnquotedRDN != IntPtr.Zero)
{
Marshal.FreeHGlobal(pUnquotedRDN);
}
}
}
#endregion DN Parsing
}


public class DNComponent
{
public string Type { get; protected set; }
public string EscapedValue { get; protected set; }
public string UnescapedValue { get; protected set; }
public string WholeComponent { get; protected set; }


public DNComponent(string component, bool isEscaped)
{
string[] tokens = component.Split(new char[] { '=' }, 2);
setup(tokens[0], tokens[1], isEscaped);
}


public DNComponent(string key, string value, bool isEscaped)
{
setup(key, value, isEscaped);
}


private void setup(string key, string value, bool isEscaped)
{
Type = key;


if(isEscaped)
{
EscapedValue = value;
UnescapedValue = Win32LDAP.UnquoteRDN(value);
}
else
{
EscapedValue = Win32LDAP.QuoteRDN(value);
UnescapedValue = value;
}


WholeComponent = Type + "=" + EscapedValue;
}


public override bool Equals(object obj)
{
if (obj is DNComponent)
{
DNComponent dnObj = (DNComponent)obj;
return dnObj.WholeComponent.Equals(this.WholeComponent, StringComparison.CurrentCultureIgnoreCase);
}
return base.Equals(obj);
}


public override int GetHashCode()
{
return WholeComponent.GetHashCode();
}
}


public class DistinguishedName
{
public DNComponent[] Components
{
get
{
return components.ToArray();
}
}


private List<DNComponent> components;
private string cachedDN;


public DistinguishedName(string distinguishedName)
{
cachedDN = distinguishedName;
components = new List<DNComponent>();
foreach (KeyValuePair<string, string> kvp in Win32LDAP.ParseDN(distinguishedName))
{
components.Add(new DNComponent(kvp.Key, kvp.Value, true));
}
}


public DistinguishedName(IEnumerable<DNComponent> dnComponents)
{
components = new List<DNComponent>(dnComponents);
cachedDN = GetWholePath(",");
}


public bool Contains(DNComponent dnComponent)
{
return components.Contains(dnComponent);
}


public string GetDNSDomainName()
{
List<string> dcs = new List<string>();
foreach (DNComponent dnc in components)
{
if(dnc.Type.Equals("DC", StringComparison.CurrentCultureIgnoreCase))
{
dcs.Add(dnc.UnescapedValue);
}
}
return string.Join(".", dcs.ToArray());
}


public string GetDomainDN()
{
List<string> dcs = new List<string>();
foreach (DNComponent dnc in components)
{
if(dnc.Type.Equals("DC", StringComparison.CurrentCultureIgnoreCase))
{
dcs.Add(dnc.WholeComponent);
}
}
return string.Join(",", dcs.ToArray());
}


public string GetWholePath()
{
return GetWholePath(",");
}


public string GetWholePath(string separator)
{
List<string> parts = new List<string>();
foreach (DNComponent component in components)
{
parts.Add(component.WholeComponent);
}
return string.Join(separator, parts.ToArray());
}


public DistinguishedName GetParent()
{
if(components.Count == 1)
{
return null;
}
List<DNComponent> tempList = new List<DNComponent>(components);
tempList.RemoveAt(0);
return new DistinguishedName(tempList);
}


public override bool Equals(object obj)
{
if(obj is DistinguishedName)
{
DistinguishedName objDN = (DistinguishedName)obj;
if (this.Components.Length == objDN.Components.Length)
{
for (int i = 0; i < this.Components.Length; i++)
{
if (!this.Components[i].Equals(objDN.Components[i]))
{
return false;
}
}
return true;
}
return false;
}
return base.Equals(obj);
}


public override int GetHashCode()
{
return cachedDN.GetHashCode();
}
}

Here are two examples of how you can get the results that you are looking for

var MyString = "OU=Level3,OU=Level2,OU=Level1,DC=domain,DC=com";

This one you would see a list of the values separated but it would have DC just an idea to show that the split with String does work`

var split = MyString.Split(new string[] { "OU=", "," }, StringSplitOptions.RemoveEmptyEntries);

This one will Split and return you only the 3 items into a List so that if you don't rely on a count you can visually validate that it returns the 3 Levels of `OU=``

var lstSplit = MyString.Split(new[] { ',' })
.Where(splitItem => splitItem.StartsWith(
"OU=", StringComparison.OrdinalIgnoreCase)).ToList();

this extension needs less resources than regualr expressions.

public static int CountSubstring(this string text, string value)
{
int count = 0, minIndex = text.IndexOf(value, 0);
while (minIndex != -1)
{
minIndex = text.IndexOf(value, minIndex + value.Length);
count++;
}
return count;
}

usage:

MyString = "OU=Level3,OU=Level2,OU=Level1,DC=domain,DC=com";
int count = MyString.CountSubstring("OU=");
public static int CountOccurences(string needle, string haystack)
{
return (haystack.Length - haystack.Replace(needle, "").Length) / needle.Length;
}

Benchmarked it against other answers here (the regex one and the "IndexOf" one), works faster.