代码之家  ›  专栏  ›  技术社区  ›  user320587

规范化excel电子表格数据(列到行)

  •  0
  • user320587  · 技术社区  · 12 年前

    我正在尝试使用Microsoft Interop excel对象来规范excel电子表格中的数据。基本上,我需要从某个列偏移量开始将列转换为行。

    Original Data:
    ColumnA ColumnB ColumnC ColumnD ColumnE ColumnF
       X       Y      10      20      30      40
    
    Normalized Data:
    ColumnA ColumnB NewColumn Value
      X        Y     ColumnC   10
      X        Y     ColumnD   20
      X        Y     ColumnE   30
      X        Y     ColumnF   40
    

    我的功能按预期工作。然而,运行时间非常慢。所以,我想知道,如果我使用像OpenXML这样的其他框架,我会看到效率的提高吗

    以下是我使用互操作对象的代码:

    public static void Normalize(string aFilePathName, string aSheetName, int aColOffSet, string aPivotColName, string aValueColName)
    {
      LOG.DebugFormat("Normaling data in file: {0}", aFilePathName);
      LOG.DebugFormat("Sheet Name:{0} ColOffset:{1}", aSheetName, aColOffSet);
    
       Excel.Application vExcel = new Excel.Application();
       Excel.Workbook vWorkbook = null;
       Excel.Worksheet vWsOriginal = null;
       Excel.Worksheet vWsNormalized = null;
       try
         {
            vExcel.Visible = false;
            vWorkbook = vExcel.Workbooks.Open(aFilePathName, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value);
            vWsOriginal = vWorkbook.Worksheets[aSheetName];
            string vNormalizedSheetName = string.Format("Normalized {0}", aSheetName);
            bool vNormalizedSheetExists = (vWorkbook.Sheets.Cast<object>()
                                                .Select(sheetValue => sheetValue as Excel.Worksheet))
                                                .Any(wbSheet => wbSheet != null && wbSheet.Name == vNormalizedSheetName);
                if (!vNormalizedSheetExists)
                {
                    vWsNormalized = vWorkbook.Worksheets.Add(vWsOriginal, Type.Missing, Type.Missing, Type.Missing);
                    vWsNormalized.Name = vNormalizedSheetName;
                }
                else
                {
                    vWsNormalized = vWorkbook.Worksheets[vNormalizedSheetName];
                }
                vWsNormalized.UsedRange.ClearContents();
    
                long vTotalColumns = 1;
                long vRowCounter = 1;
                Excel.Range vWsRange = vWsOriginal.Cells[vRowCounter, vTotalColumns];
    
                List<string> vHeaders = new List<string>();
                while (vWsRange.Value2 != null)
                {
                    vHeaders.Add(vWsRange.Value2.ToString());
                    vTotalColumns = vTotalColumns + 1;
                    vWsRange = vWsOriginal.Cells[vRowCounter, vTotalColumns];
                }
    
                // Insert the headers
                for (int vHeaderCol = 1; vHeaderCol < aColOffSet; vHeaderCol++)
                {
                    vWsNormalized.Cells[1, vHeaderCol].Value = vHeaders[vHeaderCol - 1];
                }
                vWsNormalized.Cells[1, aColOffSet].Value = aPivotColName;
                vWsNormalized.Cells[1, aColOffSet + 1].Value = aValueColName;
    
                long vNewRow = 2;
    
                for (int vCol = aColOffSet; vCol < vTotalColumns; vCol++)
                {
                    vRowCounter = 2;
                    while (((Excel.Range)vWsOriginal.Cells[vRowCounter, 1]).Value2 != null)
                    {
                        for (int j = 1; j < aColOffSet; j++)
                        {
                            vWsNormalized.Cells[vNewRow, j] = vWsOriginal.Cells[vRowCounter, j];
                        }
    
                        vWsNormalized.Cells[vNewRow, aColOffSet] = vWsOriginal.Cells[1, vCol];
                        vWsNormalized.Cells[vNewRow, aColOffSet + 1] = vWsOriginal.Cells[vRowCounter, vCol];
    
                        vRowCounter = vRowCounter + 1;
    
                        vNewRow = vNewRow + 1;
                    }
                }
            }
            finally
            {
                vWorkbook.Close(Excel.XlSaveAction.xlSaveChanges, Type.Missing, Type.Missing);
                Marshal.FinalReleaseComObject(vWsNormalized);
                Marshal.FinalReleaseComObject(vWsOriginal);
                Marshal.FinalReleaseComObject(vWorkbook);
                vExcel.Quit();
                Marshal.FinalReleaseComObject(vExcel);
            }
        }
    

    如果有可能提高性能,我愿意尝试任何其他开源框架。

    谢谢

    2 回复  |  直到 12 年前
        1
  •  0
  •   user320587    12 年前

    我能够想出一个更好的实施方案。与其循环遍历每个单元格,不如利用Excel transpose函数进行批量复制。

    public static void Normalize2(string aFilePathName, string aSheetName, int aColOffSet, string aPivotColName, string aValueColName)
        {
            LOG.DebugFormat("Normaling data in file: {0}", aFilePathName);
            LOG.DebugFormat("Sheet Name:{0} ColOffset:{1}", aSheetName, aColOffSet);
    
            Excel.Application vExcel = new Excel.Application();
            Excel.Workbook vWorkbook = null;
            Excel.Worksheet vWsOriginal = null;
            Excel.Worksheet vWsNormalized = null;
            try
            {
                vExcel.Visible = false;
                vWorkbook = vExcel.Workbooks.Open(aFilePathName, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value, Missing.Value);
                vWsOriginal = vWorkbook.Worksheets[aSheetName];
                //vWsOriginal.Name = string.Format("Original_{0}", aSheetName);
                string vNormalizedSheetName = string.Format("Normalized {0}", aSheetName);
                bool vNormalizedSheetExists = (vWorkbook.Sheets.Cast<object>()
                                                .Select(sheetValue => sheetValue as Excel.Worksheet))
                                                .Any(wbSheet => wbSheet != null && wbSheet.Name == vNormalizedSheetName);
                if (!vNormalizedSheetExists)
                {
                    vWsNormalized = vWorkbook.Worksheets.Add(vWsOriginal, Type.Missing, Type.Missing, Type.Missing);
                    vWsNormalized.Name = vNormalizedSheetName;
                }
                else
                {
                    vWsNormalized = vWorkbook.Worksheets[vNormalizedSheetName];
                }
                vWsNormalized.UsedRange.ClearContents();
    
                long vTotalColumns = 1;
                long vRowCounter = 1;
                Excel.Range vWsRange = vWsOriginal.Cells[vRowCounter, vTotalColumns];
    
                List<string> vHeaders = new List<string>();
                while (vWsRange.Value2 != null)
                {
                    vHeaders.Add(vWsRange.Value2.ToString());
                    vTotalColumns = vTotalColumns + 1;
                    vWsRange = vWsOriginal.Cells[vRowCounter, vTotalColumns];
                }
    
                // Insert the headers
                for (int vHeaderCol = 1; vHeaderCol < aColOffSet; vHeaderCol++)
                {
                    vWsNormalized.Cells[1, vHeaderCol].Value = vHeaders[vHeaderCol - 1];
                }
                vWsNormalized.Cells[1, aColOffSet].Value = aPivotColName;
                vWsNormalized.Cells[1, aColOffSet + 1].Value = aValueColName;
    
                long vNewRow = 2;
                long vValueColumns = vTotalColumns - aColOffSet;
                vRowCounter = 2;
    
                Excel.Range vHeaderData = vWsOriginal.Range[vWsOriginal.Cells[1, aColOffSet],
                                                            vWsOriginal.Cells[1, vTotalColumns - 1]];
                string[] vPivotValueNames = new string[vTotalColumns - aColOffSet];
                vHeaders.CopyTo(aColOffSet - 1, vPivotValueNames, 0, (int) (vTotalColumns - aColOffSet));
                while (((Excel.Range)vWsOriginal.Cells[vNewRow, 1]).Value2 != null)
                {
                    Excel.Range vStaticRowData = vWsOriginal.Range[vWsOriginal.Cells[vNewRow, 1],
                                                                       vWsOriginal.Cells[vNewRow, aColOffSet - 1]];
    
                    Excel.Range vDynamicRowData = vWsOriginal.Range[vWsOriginal.Cells[vNewRow, aColOffSet],
                                                                       vWsOriginal.Cells[vNewRow, vTotalColumns - 1]];
    
                    long vDestRowStart = vRowCounter;
                    long vDestRowEnd = (vRowCounter + vValueColumns) - 1;
                    Excel.Range vNormalizedStaticRowData = vWsNormalized.Range[vWsNormalized.Cells[vDestRowStart, 1],
                                                                        vWsNormalized.Cells[vDestRowEnd, aColOffSet - 1]];
                    Excel.Range vNormalizedPivotValueRowData = vWsNormalized.Range[vWsNormalized.Cells[vDestRowStart, aColOffSet],
                                                                        vWsNormalized.Cells[vDestRowEnd, aColOffSet]];
    
                    Excel.Range vNormalizedValueRowData = vWsNormalized.Range[vWsNormalized.Cells[vDestRowStart, aColOffSet + 1],
                                                                        vWsNormalized.Cells[vDestRowEnd, aColOffSet + 1]];
                    vNormalizedStaticRowData.Value = vStaticRowData.Value;
                    vNormalizedPivotValueRowData.Value = vExcel.WorksheetFunction.Transpose(vHeaderData.Value);
                    vNormalizedValueRowData.Value = vExcel.WorksheetFunction.Transpose(vDynamicRowData.Value);
    
                    vNewRow = vNewRow + 1;
                    vRowCounter = vRowCounter + vValueColumns;
                }
            }
            finally
            {
                vWorkbook.Close(Excel.XlSaveAction.xlSaveChanges, Type.Missing, Type.Missing);
                Marshal.FinalReleaseComObject(vWsNormalized);
                Marshal.FinalReleaseComObject(vWsOriginal);
                Marshal.FinalReleaseComObject(vWorkbook);
                vExcel.Quit();
                Marshal.FinalReleaseComObject(vExcel);
            }
        }
    
        2
  •  0
  •   user2943350    11 年前

    我最近不得不做一些类似的事情,并使用透视表向导发现了这个技巧: http://www.launchexcel.com/pivot-table-flatten-crosstab/