kev/Drawer/UCDraw/InterfaceWorkArea/LasHelper.cs

using System;
using System.Collections.Generic;
using System.Data;
using System.IO;
using System.IO.MemoryMappedFiles;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using WorkData.Entity;

#region 文件说明
/*-----------------------------------------------------
* 版权所有 （c） 2024 jdfcd
* CLR版本 4.0.30319.42000
* 命名空间 DQ.Construction.NewLook.Utility
* 
* 创建者：pnpe
* 电子邮件：pnpe@qq.com
* 创建时间：2024/12/31 10:38:45
*/
#endregion 文件说明
namespace InterfaceWorkAreaData.LoadCurveData
{
    public class LasHelper
    {
        //读取las
        public static ResultDataInfo LasFileToDataTable(ref DataTable table, string filename, string charset, int rowindex)
        {
            ResultDataInfo result = new ResultDataInfo();
            List<string> columnNames = new List<string>();
            try
            {
                long length = new FileInfo(filename).Length;
                using (MemoryMappedFile mmf = MemoryMappedFile.CreateFromFile(filename, FileMode.Open, Guid.NewGuid().ToString()))
                using (MemoryMappedViewStream stream = mmf.CreateViewStream(0, length))
                using (StreamReader reader = new StreamReader(stream, Encoding.GetEncoding(charset)))
                {
                    int lineindex = 0;
                    bool isReadHeader = false;
                    bool isReadBody = false;
                    string strLine = string.Empty;
                    while (!reader.EndOfStream)
                    {
                        string str = reader.ReadLine();
                        if (string.IsNullOrWhiteSpace(str))
                        {
                            continue;
                        }
                        else if (str.StartsWith("~C") || str.StartsWith("~Curve"))
                        {
                            isReadHeader = true;
                            continue;
                        }
                        else if (!isReadHeader)
                        {
                            continue;
                        }
                        else if (str.StartsWith("~A") || str.StartsWith("~Ascii"))
                        {
                            isReadBody = true;
                            table.Columns.Clear();
                            if (columnNames.Count == 0)
                            {
                                result.Code = 0;
                                result.Msg = $"文件{filename}列读取失败,已跳过文件,{Environment.NewLine}";
                                return result;
                            }
                            //说明表头写完了
                            for (int i = 0; i < columnNames.Count; i++)
                            {
                                table.Columns.Add(columnNames[i]);
                            }
                            continue;
                        }
                        if (isReadHeader && !isReadBody)
                        {
                            //判断表头，对于小数点处理和冒号处理
                            string colName = str.Replace(":", string.Empty);
                            colName = colName.Split('.')[0];
                            if (colName.StartsWith("#"))
                            {
                                continue;
                            }
                            colName = colName.Trim().ToUpper();
                            if (!columnNames.Contains(colName))
                            {
                                columnNames.Add(colName);
                            }
                            else
                            {
                                //存在重复列名
                                table.Columns.Clear();
                                result.Code = 0;
                                result.Msg = $"文件{filename}存在重复列名{colName},已跳过文件,{Environment.NewLine}";
                                return result;
                            }
                            continue;
                        }
                        strLine = str;
                        string trimString = string.Join(",", strLine.Split(new[] { "\r", "\t", "\0", " " }, StringSplitOptions.RemoveEmptyEntries));
                        string[] array = trimString.Split(new[] { "\r", "\t", "\0", ",", ";", " " }, StringSplitOptions.None);
                        if (array.Length != columnNames.Count)
                        {
                            continue;
                        }
                        if (array.Length > 0)
                        {
                            table.Rows.Add(array);
                        }
                        lineindex++;
                    }
                }
            }
            catch (Exception e)
            {
                result.Code = 0;
                result.Msg = e.Message;
                Console.WriteLine(e.ToString());
            }
            result.Code = 1;
            return result;
        }

        /// <summary>
        /// las文件转List<WellCurve>
        /// </summary>
        public static IEnumerable<WellCurve> LasFileToList(string jh, string filename, string charset)
        {
            if (!File.Exists(filename))
            {
                yield break;
            }

            using (MemoryMappedFile mmf = MemoryMappedFile.CreateFromFile(filename, FileMode.Open, Guid.NewGuid().ToString()))
            using (MemoryMappedViewStream stream = mmf.CreateViewStream(0, new FileInfo(filename).Length))
            using (StreamReader reader = new StreamReader(stream, Encoding.GetEncoding(charset)))
            {
                List<string> columnNames = new List<string>();
                bool isReadHeader = false;
                bool isReadBody = false;

                while (!reader.EndOfStream)
                {
                    string str = reader.ReadLine();
                    if (string.IsNullOrWhiteSpace(str))
                    {
                        continue;
                    }

                    if (str.StartsWith("~C") || str.StartsWith("~Curve"))
                    {
                        isReadHeader = true;
                        continue;
                    }
                    else if (!isReadHeader)
                    {
                        continue;
                    }
                    else if (str.StartsWith("~A") || str.StartsWith("~Ascii"))
                    {
                        isReadBody = true;
                        if (columnNames.Count == 0)
                        {
                            throw new Exception($"文件{filename} 列名为空！");
                        }
                        continue;
                    }

                    if (isReadHeader && !isReadBody)
                    {
                        string colName = str.Replace(":", string.Empty);
                        colName = colName.Split('.')[0];
                        if (colName.StartsWith("#"))
                        {
                            continue;
                        }
                        colName = colName.Trim().ToUpper();

                        if (!columnNames.Contains(colName))
                        {
                            columnNames.Add(colName);
                        }
                        else
                        {
                            throw new Exception($"文件{filename} 存在重复列名：{colName}");
                        }
                        continue;
                    }

                    // 数据体
                    string trimString = string.Join(",", str.Split(new[] { "\r", "\t", "\0", " " }, StringSplitOptions.RemoveEmptyEntries));
                    string[] array = trimString.Split(new[] { "\r", "\t", "\0", ",", ";", " " }, StringSplitOptions.None);

                    if (array.Length != columnNames.Count)
                    {
                        continue;
                    }

                    if (!double.TryParse(array[0], out double depth))
                    {
                        continue;
                    }

                    var dic = new Dictionary<string, string>();
                    for (int i = 1; i < array.Length; i++)
                    {
                        dic[columnNames[i]] = array[i];
                    }

                    yield return new WellCurve
                    {
                        JH = jh,
                        DEPTH = depth,
                        ColumnValues = dic,
                    };
                }
            }
        }


        /// <summary>
        /// 给定文件的路径，读取文件的二进制数据，判断文件的编码类型
        /// </summary>
        /// <param name="fileName">文件路径</param>
        /// <returns>文件的编码类型</returns>
        public static System.Text.Encoding GetType(string fileName)
        {
            FileStream fs = new FileStream(fileName, FileMode.Open, FileAccess.Read);
            Encoding r = GetType(fs);
            fs.Close();
            return r;
        }

        public static System.Text.Encoding GetType(FileStream fs)
        {
            byte[] unicode = new byte[] { 0xFF, 0xFE, 0x41 };
            byte[] unicodeBIG = new byte[] { 0xFE, 0xFF, 0x00 };
            byte[] uTF8 = new byte[] { 0xEF, 0xBB, 0xBF }; // 带BOM

            BinaryReader r = new BinaryReader(fs, System.Text.Encoding.Default);
            int i;
            int.TryParse(fs.Length.ToString(), out i);
            byte[] ss = r.ReadBytes(i);
            Encoding reVal = GetType(ss);
            r.Close();
            return reVal;
        }


        /// <summary>
        /// 获得格式编码
        /// </summary>
        /// <param name="data">数据内容</param>
        /// <returns>编码</returns>
        public static System.Text.Encoding GetType(byte[] data)
        {
            Encoding reVal = Encoding.Default;
            if (IsUTF8Bytes(data) || (data[0] == 0xEF && data[1] == 0xBB && data[2] == 0xBF))
            {
                reVal = Encoding.UTF8;
            }
            else if (data[0] == 0xFE && data[1] == 0xFF && data[2] == 0x00)
            {
                reVal = Encoding.BigEndianUnicode;
            }
            else if (data[0] == 0xFF && data[1] == 0xFE && data[2] == 0x41)
            {
                reVal = Encoding.Unicode;
            }
            return reVal;
        }
        /// <summary>
        /// 判断是否是不带 BOM 的 UTF8 格式
        /// </summary>
        /// <param name="data">数据</param>
        /// <returns>是否UTF8</returns>
        private static bool IsUTF8Bytes(byte[] data)
        {
            int charByteCounter = 1;
            // 计算当前正分析的字符应还有的字节数
            byte curByte; // 当前分析的字节.
            for (int i = 0; i < data.Length; i++)
            {
                curByte = data[i];
                if (charByteCounter == 1)
                {
                    if (curByte >= 0x80)
                    {
                        // 判断当前
                        while (((curByte <<= 1) & 0x80) != 0)
                        {
                            charByteCounter++;
                        }
                        // 标记位首位若为非0 则至少以2个1开始 如:110XXXXX...........1111110X
                        if (charByteCounter == 1 || charByteCounter > 6)
                        {
                            return false;
                        }
                    }
                }
                else
                {
                    // 若是UTF-8 此时第一位必须为1
                    if ((curByte & 0xC0) != 0x80)
                    {
                        return false;
                    }
                    charByteCounter--;
                }
            }
            if (charByteCounter > 1)
            {
                throw new Exception("非预期的byte格式");
            }
            return true;
        }
    }

    public class ResultDataInfo
    {
        public int Code { get; set; }
        public string Msg { get; set; }
        public object Data { get; set; }
    }
}