You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

326 lines
12 KiB
C#

1 month ago
using System;
using System.Collections.Generic;
using System.Data;
using System.IO;
using System.IO.MemoryMappedFiles;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using WorkData.Entity;
#region 文件说明
/*-----------------------------------------------------
* c 2024 jdfcd
* CLR 4.0.30319.42000
* DQ.Construction.NewLook.Utility
*
* pnpe
* pnpe@qq.com
* 2024/12/31 10:38:45
*/
#endregion 文件说明
namespace InterfaceWorkAreaData.LoadCurveData
{
public class LasHelper
{
//读取las
public static ResultDataInfo LasFileToDataTable(ref DataTable table, string filename, string charset, int rowindex)
{
ResultDataInfo result = new ResultDataInfo();
List<string> columnNames = new List<string>();
try
{
long length = new FileInfo(filename).Length;
using (MemoryMappedFile mmf = MemoryMappedFile.CreateFromFile(filename, FileMode.Open, Guid.NewGuid().ToString()))
using (MemoryMappedViewStream stream = mmf.CreateViewStream(0, length))
using (StreamReader reader = new StreamReader(stream, Encoding.GetEncoding(charset)))
{
int lineindex = 0;
bool isReadHeader = false;
bool isReadBody = false;
string strLine = string.Empty;
while (!reader.EndOfStream)
{
string str = reader.ReadLine();
if (string.IsNullOrWhiteSpace(str))
{
continue;
}
else if (str.StartsWith("~C") || str.StartsWith("~Curve"))
{
isReadHeader = true;
continue;
}
else if (!isReadHeader)
{
continue;
}
else if (str.StartsWith("~A") || str.StartsWith("~Ascii"))
{
isReadBody = true;
table.Columns.Clear();
if (columnNames.Count == 0)
{
result.Code = 0;
result.Msg = $"文件{filename}列读取失败,已跳过文件,{Environment.NewLine}";
return result;
}
//说明表头写完了
for (int i = 0; i < columnNames.Count; i++)
{
table.Columns.Add(columnNames[i]);
}
continue;
}
if (isReadHeader && !isReadBody)
{
//判断表头,对于小数点处理和冒号处理
string colName = str.Replace(":", string.Empty);
colName = colName.Split('.')[0];
if (colName.StartsWith("#"))
{
continue;
}
colName = colName.Trim().ToUpper();
if (!columnNames.Contains(colName))
{
columnNames.Add(colName);
}
else
{
//存在重复列名
table.Columns.Clear();
result.Code = 0;
result.Msg = $"文件{filename}存在重复列名{colName},已跳过文件,{Environment.NewLine}";
return result;
}
continue;
}
strLine = str;
string trimString = string.Join(",", strLine.Split(new[] { "\r", "\t", "\0", " " }, StringSplitOptions.RemoveEmptyEntries));
string[] array = trimString.Split(new[] { "\r", "\t", "\0", ",", ";", " " }, StringSplitOptions.None);
if (array.Length != columnNames.Count)
{
continue;
}
if (array.Length > 0)
{
table.Rows.Add(array);
}
lineindex++;
}
}
}
catch (Exception e)
{
result.Code = 0;
result.Msg = e.Message;
Console.WriteLine(e.ToString());
}
result.Code = 1;
return result;
}
/// <summary>
/// las文件转List<WellCurve>
/// </summary>
public static IEnumerable<WellCurve> LasFileToList(string jh, string filename, string charset)
{
if (!File.Exists(filename))
{
yield break;
}
using (MemoryMappedFile mmf = MemoryMappedFile.CreateFromFile(filename, FileMode.Open, Guid.NewGuid().ToString()))
using (MemoryMappedViewStream stream = mmf.CreateViewStream(0, new FileInfo(filename).Length))
using (StreamReader reader = new StreamReader(stream, Encoding.GetEncoding(charset)))
{
List<string> columnNames = new List<string>();
bool isReadHeader = false;
bool isReadBody = false;
while (!reader.EndOfStream)
{
string str = reader.ReadLine();
if (string.IsNullOrWhiteSpace(str))
{
continue;
}
if (str.StartsWith("~C") || str.StartsWith("~Curve"))
{
isReadHeader = true;
continue;
}
else if (!isReadHeader)
{
continue;
}
else if (str.StartsWith("~A") || str.StartsWith("~Ascii"))
{
isReadBody = true;
if (columnNames.Count == 0)
{
throw new Exception($"文件{filename} 列名为空!");
}
continue;
}
if (isReadHeader && !isReadBody)
{
string colName = str.Replace(":", string.Empty);
colName = colName.Split('.')[0];
if (colName.StartsWith("#"))
{
continue;
}
colName = colName.Trim().ToUpper();
if (!columnNames.Contains(colName))
{
columnNames.Add(colName);
}
else
{
throw new Exception($"文件{filename} 存在重复列名:{colName}");
}
continue;
}
// 数据体
string trimString = string.Join(",", str.Split(new[] { "\r", "\t", "\0", " " }, StringSplitOptions.RemoveEmptyEntries));
string[] array = trimString.Split(new[] { "\r", "\t", "\0", ",", ";", " " }, StringSplitOptions.None);
if (array.Length != columnNames.Count)
{
continue;
}
if (!double.TryParse(array[0], out double depth))
{
continue;
}
var dic = new Dictionary<string, string>();
for (int i = 1; i < array.Length; i++)
{
dic[columnNames[i]] = array[i];
}
yield return new WellCurve
{
JH = jh,
DEPTH = depth,
ColumnValues = dic,
};
}
}
}
/// <summary>
/// 给定文件的路径,读取文件的二进制数据,判断文件的编码类型
/// </summary>
/// <param name="fileName">文件路径</param>
/// <returns>文件的编码类型</returns>
public static System.Text.Encoding GetType(string fileName)
{
FileStream fs = new FileStream(fileName, FileMode.Open, FileAccess.Read);
Encoding r = GetType(fs);
fs.Close();
return r;
}
public static System.Text.Encoding GetType(FileStream fs)
{
byte[] unicode = new byte[] { 0xFF, 0xFE, 0x41 };
byte[] unicodeBIG = new byte[] { 0xFE, 0xFF, 0x00 };
byte[] uTF8 = new byte[] { 0xEF, 0xBB, 0xBF }; // 带BOM
BinaryReader r = new BinaryReader(fs, System.Text.Encoding.Default);
int i;
int.TryParse(fs.Length.ToString(), out i);
byte[] ss = r.ReadBytes(i);
Encoding reVal = GetType(ss);
r.Close();
return reVal;
}
/// <summary>
/// 获得格式编码
/// </summary>
/// <param name="data">数据内容</param>
/// <returns>编码</returns>
public static System.Text.Encoding GetType(byte[] data)
{
Encoding reVal = Encoding.Default;
if (IsUTF8Bytes(data) || (data[0] == 0xEF && data[1] == 0xBB && data[2] == 0xBF))
{
reVal = Encoding.UTF8;
}
else if (data[0] == 0xFE && data[1] == 0xFF && data[2] == 0x00)
{
reVal = Encoding.BigEndianUnicode;
}
else if (data[0] == 0xFF && data[1] == 0xFE && data[2] == 0x41)
{
reVal = Encoding.Unicode;
}
return reVal;
}
/// <summary>
/// 判断是否是不带 BOM 的 UTF8 格式
/// </summary>
/// <param name="data">数据</param>
/// <returns>是否UTF8</returns>
private static bool IsUTF8Bytes(byte[] data)
{
int charByteCounter = 1;
// 计算当前正分析的字符应还有的字节数
byte curByte; // 当前分析的字节.
for (int i = 0; i < data.Length; i++)
{
curByte = data[i];
if (charByteCounter == 1)
{
if (curByte >= 0x80)
{
// 判断当前
while (((curByte <<= 1) & 0x80) != 0)
{
charByteCounter++;
}
// 标记位首位若为非0 则至少以2个1开始 如:110XXXXX...........1111110X
if (charByteCounter == 1 || charByteCounter > 6)
{
return false;
}
}
}
else
{
// 若是UTF-8 此时第一位必须为1
if ((curByte & 0xC0) != 0x80)
{
return false;
}
charByteCounter--;
}
}
if (charByteCounter > 1)
{
throw new Exception("非预期的byte格式");
}
return true;
}
}
public class ResultDataInfo
{
public int Code { get; set; }
public string Msg { get; set; }
public object Data { get; set; }
}
}