C # 的压缩/解压缩字符串

我是新来的。网。我在 C # 中做压缩和解压缩字符串。有一个 XML,我转换成字符串,然后进行压缩和解压缩。在我的代码中没有编译错误,除非当我解压缩我的代码并返回我的字符串时,它只返回一半的 XML。

下面是我的代码,请纠正我的错误。

密码:

class Program
{
public static string Zip(string value)
{
//Transform string into byte[]
byte[] byteArray = new byte[value.Length];
int indexBA = 0;
foreach (char item in value.ToCharArray())
{
byteArray[indexBA++] = (byte)item;
}


//Prepare for compress
System.IO.MemoryStream ms = new System.IO.MemoryStream();
System.IO.Compression.GZipStream sw = new System.IO.Compression.GZipStream(ms, System.IO.Compression.CompressionMode.Compress);


//Compress
sw.Write(byteArray, 0, byteArray.Length);
//Close, DO NOT FLUSH cause bytes will go missing...
sw.Close();


//Transform byte[] zip data to string
byteArray = ms.ToArray();
System.Text.StringBuilder sB = new System.Text.StringBuilder(byteArray.Length);
foreach (byte item in byteArray)
{
sB.Append((char)item);
}
ms.Close();
sw.Dispose();
ms.Dispose();
return sB.ToString();
}


public static string UnZip(string value)
{
//Transform string into byte[]
byte[] byteArray = new byte[value.Length];
int indexBA = 0;
foreach (char item in value.ToCharArray())
{
byteArray[indexBA++] = (byte)item;
}


//Prepare for decompress
System.IO.MemoryStream ms = new System.IO.MemoryStream(byteArray);
System.IO.Compression.GZipStream sr = new System.IO.Compression.GZipStream(ms,
System.IO.Compression.CompressionMode.Decompress);


//Reset variable to collect uncompressed result
byteArray = new byte[byteArray.Length];


//Decompress
int rByte = sr.Read(byteArray, 0, byteArray.Length);


//Transform byte[] unzip data to string
System.Text.StringBuilder sB = new System.Text.StringBuilder(rByte);
//Read the number of bytes GZipStream red and do not a for each bytes in
//resultByteArray;
for (int i = 0; i < rByte; i++)
{
sB.Append((char)byteArray[i]);
}
sr.Close();
ms.Close();
sr.Dispose();
ms.Dispose();
return sB.ToString();
}


static void Main(string[] args)
{
XDocument doc = XDocument.Load(@"D:\RSP.xml");
string val = doc.ToString(SaveOptions.DisableFormatting);
val = Zip(val);
val = UnZip(val);
}
}

我的 XML 大小是63KB。

279572 次浏览

压缩/解压缩字符串的代码

public static void CopyTo(Stream src, Stream dest) {
byte[] bytes = new byte[4096];


int cnt;


while ((cnt = src.Read(bytes, 0, bytes.Length)) != 0) {
dest.Write(bytes, 0, cnt);
}
}


public static byte[] Zip(string str) {
var bytes = Encoding.UTF8.GetBytes(str);


using (var msi = new MemoryStream(bytes))
using (var mso = new MemoryStream()) {
using (var gs = new GZipStream(mso, CompressionMode.Compress)) {
//msi.CopyTo(gs);
CopyTo(msi, gs);
}


return mso.ToArray();
}
}


public static string Unzip(byte[] bytes) {
using (var msi = new MemoryStream(bytes))
using (var mso = new MemoryStream()) {
using (var gs = new GZipStream(msi, CompressionMode.Decompress)) {
//gs.CopyTo(mso);
CopyTo(gs, mso);
}


return Encoding.UTF8.GetString(mso.ToArray());
}
}


static void Main(string[] args) {
byte[] r1 = Zip("StringStringStringStringStringStringStringStringStringStringStringStringStringString");
string r2 = Unzip(r1);
}

请记住,Zip返回 byte[],而 Unzip返回 string。如果你想要一个字符串从 Zip你可以 Base64编码它(例如使用 Convert.ToBase64String(r1))(结果的 Zip是非常二进制!它不是可以打印到屏幕上或直接用 XML 编写的东西)

建议的版本是.NET 2.0,.NET 4.0使用 MemoryStream.CopyTo

重要提示: GZipStream知道它拥有所有的输入之前,压缩的内容不能写入输出流(也就是说,有效地压缩它需要所有的数据)。在检查输出流(例如,mso.ToArray())之前,您需要确保 GZipStreamDispose()。这是用上面的 using() { }块完成的。请注意,GZipStream是最里面的块,内容是在其外部访问的。在尝试访问数据之前,对 GZipStreamDispose()进行解压缩也是如此。

根据 这个片段 我用这个代码,它工作得很好:

using System;
using System.IO;
using System.IO.Compression;
using System.Text;


namespace CompressString
{
internal static class StringCompressor
{
/// <summary>
/// Compresses the string.
/// </summary>
/// <param name="text">The text.</param>
/// <returns></returns>
public static string CompressString(string text)
{
byte[] buffer = Encoding.UTF8.GetBytes(text);
var memoryStream = new MemoryStream();
using (var gZipStream = new GZipStream(memoryStream, CompressionMode.Compress, true))
{
gZipStream.Write(buffer, 0, buffer.Length);
}


memoryStream.Position = 0;


var compressedData = new byte[memoryStream.Length];
memoryStream.Read(compressedData, 0, compressedData.Length);


var gZipBuffer = new byte[compressedData.Length + 4];
Buffer.BlockCopy(compressedData, 0, gZipBuffer, 4, compressedData.Length);
Buffer.BlockCopy(BitConverter.GetBytes(buffer.Length), 0, gZipBuffer, 0, 4);
return Convert.ToBase64String(gZipBuffer);
}


/// <summary>
/// Decompresses the string.
/// </summary>
/// <param name="compressedText">The compressed text.</param>
/// <returns></returns>
public static string DecompressString(string compressedText)
{
byte[] gZipBuffer = Convert.FromBase64String(compressedText);
using (var memoryStream = new MemoryStream())
{
int dataLength = BitConverter.ToInt32(gZipBuffer, 0);
memoryStream.Write(gZipBuffer, 4, gZipBuffer.Length - 4);


var buffer = new byte[dataLength];


memoryStream.Position = 0;
using (var gZipStream = new GZipStream(memoryStream, CompressionMode.Decompress))
{
gZipStream.Read(buffer, 0, buffer.Length);
}


return Encoding.UTF8.GetString(buffer);
}
}
}
}

对于那些仍然得到 < strong > 的人来说,GZip 头部的神奇数字是不正确的。确保您正在传递 GZip 流。错误 如果你的字符串是用 我不知道压缩的,你需要这样做:

       public static string decodeDecompress(string originalReceivedSrc) {
byte[] bytes = Convert.FromBase64String(originalReceivedSrc);


using (var mem = new MemoryStream()) {
//the trick is here
mem.Write(new byte[] { 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00 }, 0, 8);
mem.Write(bytes, 0, bytes.Length);


mem.Position = 0;


using (var gzip = new GZipStream(mem, CompressionMode.Decompress))
using (var reader = new StreamReader(gzip)) {
return reader.ReadToEnd();
}
}
}

随着。NET 4.0(及更高版本)。CopyTo ()方法,我想我应该发布一个更新的方法。

我还认为下面的版本作为一个自包含类的清晰示例非常有用,该类用于将常规字符串压缩为 Base64编码的字符串,反之亦然:

public static class StringCompression
{
/// <summary>
/// Compresses a string and returns a deflate compressed, Base64 encoded string.
/// </summary>
/// <param name="uncompressedString">String to compress</param>
public static string Compress(string uncompressedString)
{
byte[] compressedBytes;


using (var uncompressedStream = new MemoryStream(Encoding.UTF8.GetBytes(uncompressedString)))
{
using (var compressedStream = new MemoryStream())
{
// setting the leaveOpen parameter to true to ensure that compressedStream will not be closed when compressorStream is disposed
// this allows compressorStream to close and flush its buffers to compressedStream and guarantees that compressedStream.ToArray() can be called afterward
// although MSDN documentation states that ToArray() can be called on a closed MemoryStream, I don't want to rely on that very odd behavior should it ever change
using (var compressorStream = new DeflateStream(compressedStream, CompressionLevel.Fastest, true))
{
uncompressedStream.CopyTo(compressorStream);
}


// call compressedStream.ToArray() after the enclosing DeflateStream has closed and flushed its buffer to compressedStream
compressedBytes = compressedStream.ToArray();
}
}


return Convert.ToBase64String(compressedBytes);
}


/// <summary>
/// Decompresses a deflate compressed, Base64 encoded string and returns an uncompressed string.
/// </summary>
/// <param name="compressedString">String to decompress.</param>
public static string Decompress(string compressedString)
{
byte[] decompressedBytes;


var compressedStream = new MemoryStream(Convert.FromBase64String(compressedString));


using (var decompressorStream = new DeflateStream(compressedStream, CompressionMode.Decompress))
{
using (var decompressedStream = new MemoryStream())
{
decompressorStream.CopyTo(decompressedStream);


decompressedBytes = decompressedStream.ToArray();
}
}


return Encoding.UTF8.GetString(decompressedBytes);
}
}

下面是另一种使用扩展方法技术来扩展 String 类以添加字符串压缩和解压缩的方法。您可以将下面的类放到一个现有的项目中,然后使用:

var uncompressedString = "Hello World!";
var compressedString = uncompressedString.Compress();

还有

var decompressedString = compressedString.Decompress();

也就是说:

public static class Extensions
{
/// <summary>
/// Compresses a string and returns a deflate compressed, Base64 encoded string.
/// </summary>
/// <param name="uncompressedString">String to compress</param>
public static string Compress(this string uncompressedString)
{
byte[] compressedBytes;


using (var uncompressedStream = new MemoryStream(Encoding.UTF8.GetBytes(uncompressedString)))
{
using (var compressedStream = new MemoryStream())
{
// setting the leaveOpen parameter to true to ensure that compressedStream will not be closed when compressorStream is disposed
// this allows compressorStream to close and flush its buffers to compressedStream and guarantees that compressedStream.ToArray() can be called afterward
// although MSDN documentation states that ToArray() can be called on a closed MemoryStream, I don't want to rely on that very odd behavior should it ever change
using (var compressorStream = new DeflateStream(compressedStream, CompressionLevel.Fastest, true))
{
uncompressedStream.CopyTo(compressorStream);
}


// call compressedStream.ToArray() after the enclosing DeflateStream has closed and flushed its buffer to compressedStream
compressedBytes = compressedStream.ToArray();
}
}


return Convert.ToBase64String(compressedBytes);
}


/// <summary>
/// Decompresses a deflate compressed, Base64 encoded string and returns an uncompressed string.
/// </summary>
/// <param name="compressedString">String to decompress.</param>
public static string Decompress(this string compressedString)
{
byte[] decompressedBytes;


var compressedStream = new MemoryStream(Convert.FromBase64String(compressedString));


using (var decompressorStream = new DeflateStream(compressedStream, CompressionMode.Decompress))
{
using (var decompressedStream = new MemoryStream())
{
decompressorStream.CopyTo(decompressedStream);


decompressedBytes = decompressedStream.ToArray();
}
}


return Encoding.UTF8.GetString(decompressedBytes);
}
}

这是一个针对.NET 4.5和更新版本的更新版本,使用了异步/等待和 IEnumerables:

public static class CompressionExtensions
{
public static async Task<IEnumerable<byte>> Zip(this object obj)
{
byte[] bytes = obj.Serialize();


using (MemoryStream msi = new MemoryStream(bytes))
using (MemoryStream mso = new MemoryStream())
{
using (var gs = new GZipStream(mso, CompressionMode.Compress))
await msi.CopyToAsync(gs);


return mso.ToArray().AsEnumerable();
}
}


public static async Task<object> Unzip(this byte[] bytes)
{
using (MemoryStream msi = new MemoryStream(bytes))
using (MemoryStream mso = new MemoryStream())
{
using (var gs = new GZipStream(msi, CompressionMode.Decompress))
{
// Sync example:
//gs.CopyTo(mso);


// Async way (take care of using async keyword on the method definition)
await gs.CopyToAsync(mso);
}


return mso.ToArray().Deserialize();
}
}
}


public static class SerializerExtensions
{
public static byte[] Serialize<T>(this T objectToWrite)
{
using (MemoryStream stream = new MemoryStream())
{
BinaryFormatter binaryFormatter = new BinaryFormatter();
binaryFormatter.Serialize(stream, objectToWrite);


return stream.GetBuffer();
}
}


public static async Task<T> _Deserialize<T>(this byte[] arr)
{
using (MemoryStream stream = new MemoryStream())
{
BinaryFormatter binaryFormatter = new BinaryFormatter();
await stream.WriteAsync(arr, 0, arr.Length);
stream.Position = 0;


return (T)binaryFormatter.Deserialize(stream);
}
}


public static async Task<object> Deserialize(this byte[] arr)
{
object obj = await arr._Deserialize<object>();
return obj;
}
}

通过这种方式,您可以序列化 BinaryFormatter支持的所有内容,而不仅仅是字符串。

编辑:

如果你需要照顾的 Encoding,你可以只使用 < a href = “ https://Learn.microsoft.com/es-es/dotnet/api/system.Convert.ToBase64String? view = netFramework-4.8 # System _ Convert _ ToBase64String _ System _ Byte _ _”rel = “ noReferrer”> Convert.ToBase64String (byte []) ..。

如果你需要一个例子,看看这个答案!

我最喜欢@fubo 的回答,但我觉得这个更优雅。

这种方法更加兼容,因为它不会手动预先存储长度。

此外,我还公开了一些扩展,以支持字符串到字符串、字节[]到字节[]和流到流的压缩。

public static class ZipExtensions
{
public static string CompressToBase64(this string data)
{
return Convert.ToBase64String(Encoding.UTF8.GetBytes(data).Compress());
}


public static string DecompressFromBase64(this string data)
{
return Encoding.UTF8.GetString(Convert.FromBase64String(data).Decompress());
}
    

public static byte[] Compress(this byte[] data)
{
using (var sourceStream = new MemoryStream(data))
using (var destinationStream = new MemoryStream())
{
sourceStream.CompressTo(destinationStream);
return destinationStream.ToArray();
}
}


public static byte[] Decompress(this byte[] data)
{
using (var sourceStream = new MemoryStream(data))
using (var destinationStream = new MemoryStream())
{
sourceStream.DecompressTo(destinationStream);
return destinationStream.ToArray();
}
}
    

public static void CompressTo(this Stream stream, Stream outputStream)
{
using (var gZipStream = new GZipStream(outputStream, CompressionMode.Compress))
{
stream.CopyTo(gZipStream);
gZipStream.Flush();
}
}


public static void DecompressTo(this Stream stream, Stream outputStream)
{
using (var gZipStream = new GZipStream(stream, CompressionMode.Decompress))
{
gZipStream.CopyTo(outputStream);
}
}
}

我们可以通过使用 StreamReader 和 StreamWriter 而不是手动将字符串转换为字节数组来降低代码复杂性。你只需要三个流:

    public static byte[] Zip(string uncompressed)
{
byte[] ret;
using (var outputMemory = new MemoryStream())
{
using (var gz = new GZipStream(outputMemory, CompressionLevel.Optimal))
{
using (var sw = new StreamWriter(gz, Encoding.UTF8))
{
sw.Write(uncompressed);
}
}
ret = outputMemory.ToArray();
}
return ret;
}


public static string Unzip(byte[] compressed)
{
string ret = null;
using (var inputMemory = new MemoryStream(compressed))
{
using (var gz = new GZipStream(inputMemory, CompressionMode.Decompress))
{
using (var sr = new StreamReader(gz, Encoding.UTF8))
{
ret = sr.ReadToEnd();
}
}
}
return ret;
}

对于. net6跨平台使用 SharpZipLib 库的 C # 压缩/解压字符串。测试 ubuntu (18.0.x)和 Windows。

#region helper


private byte[] Zip(string text)
{
if (text == null)
return null;


byte[] ret;
using (var outputMemory = new MemoryStream())
{
using (var gz = new GZipStream(outputMemory, CompressionLevel.Optimal))
{
using (var sw = new StreamWriter(gz, Encoding.UTF8))
{
sw.Write(text);
}
}
ret = outputMemory.ToArray();
}
return ret;
}


private string Unzip(byte[] bytes)
{
string ret = null;
using (var inputMemory = new MemoryStream(bytes))
{
using (var gz = new GZipStream(inputMemory, CompressionMode.Decompress))
{
using (var sr = new StreamReader(gz, Encoding.UTF8))
{
ret = sr.ReadToEnd();
}
}
}
return ret;
}
#endregion