11/19/11

Import Excel Sheet to Datatable in VB.Net

Tương tự như Import Excel Sheet to Datatable in C#.Net -> Hướng dẫn làm thế nào để chèn Excel vào CSDL bằng ngôn ngữ VB

View Code:
Imports System
Imports System.Collections.Generic
Imports System.Text
Imports System.Data
Imports System.Data.OleDb
Imports System.ComponentModel

Namespace Vtrung
''' <summary>
''' The classes in this file use oledb to query excel worksheets and thus can be
''' used without the interop assemblies.
''' In other words, you can simply copy , paste and use :D
''' </summary>
''' <remarks></remarks>
Public Class ExcelBase
Inherits Component
Implements IDisposable
#Region "Constructors"
Public Sub New()
UseFinalizer = False
End Sub
Public Sub New(ByVal WorkBook As String)
Me.New()
Me.WorkBook = WorkBook
End Sub
#End Region
#Region "Workbook/range settings"
Private m_workbook As String
''' <summary>
''' The workbook (file) name to query
''' </summary>
<DefaultValue("Nothing")> _
Public Property WorkBook() As String
Get
Return m_workbook
End Get
Set(ByVal value As String)
CloseConnection()
m_workbook = value
determinedrange = Nothing
End Set
End Property
''' <summary>
''' The Range which to query. This can be any Excel range (eg "A1:B5") or
''' just a worksheet name.
''' If this value is null, the first sheet of the <see cref="WorkBook"/> is used
''' </summary>
Private m_range As String
<DefaultValue("Nothing")> _
Public Property Range() As String
Get
Return m_range
End Get
Set(ByVal value As String)
m_range = value
determinedrange = Nothing
End Set
End Property
Private m_worksheetindex As Integer = 0
''' <summary>
''' The 0 based INDEX of the worksheet to query.
''' If you want to set the name of the worksheet, use <see cref="Range"/> instead.
''' NB: if <see cref="Range"/> is set, this property is ignored
''' </summary>
<DefaultValue(0)> _
Public Property WorkSheetIndex() As Integer
Get
Return m_worksheetindex
End Get
Set(ByVal value As Integer)
m_worksheetindex = value
determinedrange = Nothing
End Set
End Property
#Region "Range formatting"
''' <summary>
''' If a range was determined in a previous step, keep it buffered here
''' </summary>
Private determinedrange As String
''' <summary>
''' Gets the properly formatted sheet name
''' if no worksheet was provided, read out sheet information and select everything
''' from the first sheet
''' </summary>
Public Function GetRange() As String
If determinedrange Is Nothing Then
Dim range__1 As String = Range
If range__1 Is Nothing Then
range__1 = DetermineRange()
End If
If range__1.IndexOf(":"c) = -1 AndAlso Not range__1.EndsWith("$") Then
range__1 += "$"
End If
'sheetname has to be appended with a $
determinedrange = "[" & range__1 & "]"
End If
Return determinedrange
End Function

''' <summary>
''' See <see cref="AutoDetermineRange"/> property for more info
''' </summary>
''' <returns></returns>
Private Function DetermineRange() As String
Dim sheet As String = GetSheetName(m_worksheetindex)
If Not m_autodeterminerange Then
Return sheet
End If
Return New RangeFinder(Me, sheet).ToString()
End Function
#Region "RangeFinder"
Private Class RangeFinder
Private da As OleDbDataAdapter
Private dtSchema As DataTable
Private rng As New ExcelDataRange()
Private eb As Import
Private cols As Integer
''' <summary>
''' minimum amount of columns that need to be filled
''' <seealso cref="minfilled"/>
''' </summary>
Private min As Integer
Public Sub New(ByVal Owner As ExcelBase, ByVal sheet As String)
Me.eb = New Import(Owner.WorkBook)
eb.Range = sheet
eb.UseHeaders = False
eb.InterMixedAsText = True
'DataTable dt = eb.Query();
Try
eb.OpenConnection()
'get the number of rows and columns
da = New OleDbDataAdapter("select * from [" & sheet & "]", eb.Connection)
dtSchema = New DataTable()
da.FillSchema(dtSchema, SchemaType.Source)
cols = dtSchema.Columns.Count
Dim rows As Integer = CInt(ExecuteScalar("select count(*) from [" & sheet & "]"))
'fill the range object
rng.From.Row = InlineAssignHelper(rng.From.Column, 1)
rng._To.Row = rows
rng._To.Column = cols

min = CInt((cols * minfilled))
'now rng contains the complete square range of data containing cells
'try to narrow it by getting as much hits as possible
DecreaseRange()
Finally
indexReader.Close()
eb.CloseConnection()
End Try
End Sub
Private Function ExecuteScalar(ByVal sql As String) As Object
Return New OleDbCommand(sql, da.SelectCommand.Connection).ExecuteScalar()
End Function

Private indexquery As String
Private Function GetIndexQuery() As String
If indexquery Is Nothing Then
Dim sql As New StringBuilder("select 0")

For Each dr As DataRow In dtSchema.Rows
Dim colname As String = "[" & dr("column_name").ToString() & "]"
sql.Append("+iif(").Append(colname).Append(" is null,0,1)")
Next
sql.Append(" as ind from ")
indexquery = sql.ToString()
End If
Return indexquery
End Function
'ExcelDataRange indexRange;
Private indexTable As New DataTable()
Private indexReader As OleDbDataReader
Private Function GetIndex() As Integer
If Not Forward Then
indexReader.Close()
indexReader = Nothing
da.SelectCommand.CommandText = String.Format(" select * from {0}:{0}", rng._To.Row)
End If
If indexReader Is Nothing Then
indexReader = da.SelectCommand.ExecuteReader()
End If
Dim cnt As Integer = 0
If Not indexReader.Read() Then
Return -1
End If
For i As Integer = 0 To indexReader.FieldCount - 1
If Not indexReader.IsDBNull(i) Then
cnt += 1
End If
Next
Return cnt
da.TableMappings.Clear()


da = New OleDbDataAdapter(da.SelectCommand.CommandText, eb.conn)
indexTable = New DataTable()
'da.FillSchema(indexTable, SchemaType.Source);
da.Fill(indexTable)
Return indexTable.Columns.Count
End Function
''' <summary>
''' minimum percentage that needs to be filled to count as a datarow
''' </summary>
Const minfilled As Double = 0.75
''' <summary>
''' The amount of subsequent (or preceding) rows that need to be filled a <see cref="minfilled"/> percentage
''' for it to count as a datarow
''' </summary>
Const CheckRows As Integer = 3
''' <summary>
''' Decrease the range step by step
''' The problem is that when obtaining all, a lot more nulls are returned
''' than you would visibly see. That makes most algorithms to get the
''' block useless.
''' this is also why just obtaining the datatable complete and removing the
''' rows will not suffice: the proper field data types will not have been set
''' Best way I could figure without using interop was to increase the start
''' range to see if the avarage filled values increase.
''' </summary>
Private Sub DecreaseRange()
While True
If GetIndex() >= min Then
Dim i As Integer = 0
While i < CheckRows
AlterRange(1)
If GetIndex() < min Then
Exit While
End If
i += 1
End While
If i = CheckRows Then
AlterRange(-i)
If Forward Then
Forward = False
Else
Exit While
End If
End If
End If
If rng.From.Row > rng._To.Row Then
Throw New Exception("Could not determine data range")
End If
AlterRange(1)
End While
End Sub
Private Forward As Boolean = True
Private Sub AlterRange(ByVal i As Integer)
If Forward Then
rng.From.Row += i
Else
rng._To.Row -= i
End If
End Sub

Public Overloads Overrides Function ToString() As String
Return rng.ToString()
End Function
Private Structure ExcelRange
Public Row As Integer, Column As Integer
Public Sub New(ByVal Col As Integer, ByVal Row As Integer)
Me.Column = Col
Me.Row = Row
End Sub
Public Overloads Overrides Function ToString() As String
'return string.Format("R{0}C{1}", Row, Column);
Dim res As String = Row.ToString()
Dim col As Integer = Column
While col > 0
Dim cc As Integer = col Mod 26
Dim c As Char = CChar("A" + ChrW((cc - 1)))
res = c.ToString() + res
col /= 26
End While
Return res
End Function
End Structure
Private Structure ExcelDataRange
Public From As ExcelRange, _To As ExcelRange
Public Overloads Overrides Function ToString() As String
Return GetRange(From, _To)
End Function
Private Shared Function GetRange(ByVal from As ExcelRange, ByVal _To As ExcelRange) As String
Return (from.ToString() & ":") + _To.ToString()
End Function
Public Function TopRow() As String
Return GetRange(From, New ExcelRange(_To.Column, From.Row))
End Function
Public Function BottomRow() As String
Return GetRange(New ExcelRange(From.Column, _To.Row), _To)
End Function
End Structure
Private Shared Function InlineAssignHelper(Of T)(ByRef target As T, ByVal value As T) As T
target = value
Return value
End Function
End Class
#End Region
#End Region

''' <summary>
''' Checks if the <see cref="WorkBook"/> exists
''' </summary>
Public ReadOnly Property WorkBookExists() As Boolean
Get
Return System.IO.File.Exists(WorkBook)
End Get
End Property
''' <summary>
''' Checks if the workbook exists and throws an exception if it doesn't
''' <seealso cref="WorkBookExists"/>
''' </summary>
Protected Sub CheckWorkbook()
If Not WorkBookExists Then
Throw New System.IO.FileNotFoundException("Workbook not found", WorkBook)
End If
End Sub
#End Region
#Region "Connection"
''' <summary>
''' Creates a NEW connection. If this method is called directly, this
''' class will not check if it is closed.
''' To get a handled connection, use the <see cref="Connection"/> property.
''' </summary>
''' <returns></returns>
Public Function CreateConnection() As OleDbConnection
CheckWorkbook()
Return New OleDbConnection(String.Format("Provider=Microsoft. Jet.OLEDB.4.0;" & "Data Source={0};Extended Properties='Excel 8.0;HDR={1};Imex={2}'", WorkBook, IIf(m_useheaders, "Yes", "No"), IIf(imex, "1", "0")))
End Function
Private m_useheaders As Boolean = True
''' <summary>
''' Determines if the first row in the specified <see cref="Range"/> contains the headers
''' </summary>
<DefaultValue(True)> _
Public Property UseHeaders() As Boolean
Get
Return m_useheaders
End Get
Set(ByVal value As Boolean)
If m_useheaders <> value Then
CloseConnection()
m_useheaders = value
End If
End Set
End Property
Private imex As Boolean
''' <summary>
''' if this value is <c>true</c>, 'intermixed' data columns are handled as text (otherwise Excel tries to make a calcuated guess on what the datatype should be)
''' </summary>
<DefaultValue(False)> _
Public Property InterMixedAsText() As Boolean
Get
Return imex
End Get
Set(ByVal value As Boolean)
If imex <> value Then
CloseConnection()
imex = value
End If
End Set
End Property
Private m_autodeterminerange As Boolean
''' <summary>
''' Tries to obtain the range automatically by looking for a large chunk of data. Use this value if there's a lot of
''' static around the actual data.
''' Beware though: this takes some additional steps and can cause performance loss
''' when querying larger files.
''' automatically determening the range is not fullproof. Be sure to check the results
''' on first time use.
''' NB: if the <see cref="Range"/> is set, this property is ignored.
''' </summary>
<DefaultValue(False)> _
Public Property AutoDetermineRange() As Boolean
Get
Return m_autodeterminerange
End Get
Set(ByVal value As Boolean)
If m_autodeterminerange <> value Then
m_autodeterminerange = value
determinedrange = Nothing
End If
End Set
End Property
Private conn As OleDbConnection
''' <summary>
''' Gets a connection to the current <see cref="WorkBook"/>
''' When called for the first time (or after changing the workbook)
''' a new connection is created.
''' To close the connection, preferred is the use of <see cref="CloseConnection"/>
''' </summary>
Public ReadOnly Property Connection() As OleDbConnection
Get
If conn Is Nothing Then
conn = CreateConnection()
UseFinalizer = True
End If
Return conn
End Get
End Property
''' <summary>
''' Closes the connection (if open)
''' </summary>
Public Sub CloseConnection()
If conn IsNot Nothing AndAlso ConnectionIsOpen Then
conn.Dispose()
End If
conn = Nothing
UseFinalizer = False
End Sub
Protected Sub CloseConnection(ByVal OnlyIfNoneOpen As Boolean)
If OnlyIfNoneOpen Then
If System.Threading.Interlocked.Decrement(opencount) > 0 OrElse wasopenbeforerememberstate Then
Exit Sub
End If
End If
CloseConnection()
End Sub
''' <summary>
''' Opens the <see cref="Connection"/>
''' </summary>
Public Sub OpenConnection()
OpenConnection(False)
End Sub
Private opencount As Integer
Private wasopenbeforerememberstate As Boolean
Protected Sub OpenConnection(ByVal RememberState As Boolean)
If RememberState AndAlso System.Math.Max(System.Threading.Interlocked.Incre ment(opencount), opencount - 1) = 0 Then
wasopenbeforerememberstate = ConnectionIsOpen
End If
If Not ConnectionIsOpen Then
Connection.Open()
End If
End Sub
Public ReadOnly Property ConnectionIsOpen() As Boolean
Get
Return conn IsNot Nothing AndAlso conn.State <> ConnectionState.Closed
End Get
End Property

#End Region
#Region "IDisposable Members"
Public Overloads Sub Dispose()
CloseConnection()
End Sub
Protected Overrides Sub Finalize()
Try
Dispose()
Finally
MyBase.Finalize()
End Try
End Sub
Private m_usefinalizer As Boolean
Private Property UseFinalizer() As Boolean
Get
Return m_usefinalizer
End Get
Set(ByVal value As Boolean)
If m_usefinalizer = value Then
Exit Property
End If
m_usefinalizer = value
If value Then
GC.ReRegisterForFinalize(Me)
Else
GC.SuppressFinalize(Me)
End If
End Set
End Property
#End Region
#Region "Helper functions"
''' <summary>
''' queries the connection for the sheetnames and returns them
''' </summary>
''' <returns></returns>
Public Function GetSheetNames() As String()
OpenConnection(True)
Try
' Read out sheet information
Dim dt As DataTable = Connection.GetOleDbSchemaTable(OleDbSchemaGuid.Tab les, Nothing)
If dt Is Nothing OrElse dt.Rows.Count = 0 Then
Throw New Exception("Could not get sheet names")
End If

Dim res As String() = New String(dt.Rows.Count - 1) {}
For i As Integer = 0 To res.Length - 1
Dim name As String = dt.Rows(i)("TABLE_NAME").ToString()

If name(0) = "'"c Then
'numeric sheetnames get single quotes around them in the schema.
'remove them here
If System.Text.RegularExpressions.Regex.IsMatch(name, "^'\d\w+\$'$") Then
name = name.Substring(1, name.Length - 2)
End If
End If
res(i) = name
Next
Return res
Finally
CloseConnection(True)
End Try
End Function
''' <summary>
''' Gets the name of the first sheet
''' (this is also the default range used, when no <see cref="Range"/> is specified)
''' </summary>
''' <returns></returns>
Public Function GetFirstSheet() As String
Return GetSheetName(0)
End Function
Public Function GetSheetName(ByVal index As Integer) As String
Dim sheets As String() = GetSheetNames()
If index < 0 OrElse index >= sheets.Length Then
Throw New IndexOutOfRangeException("No worksheet exists at the specified index")
End If
Return sheets(index)
End Function
#End Region
End Class
Public Class Import
Inherits ExcelBase
#Region "Static query procedures"
''' <summary>
''' Imports the first worksheet of the specified file
''' </summary>
''' <param name="File"></param>
Public Shared Function Query(ByVal File As String) As DataTable
Return Query(File, Nothing)
End Function
''' <summary>
''' Imports the specified sheet in the specified file
''' </summary>
''' <param name="File"></param>
''' <param name="Range">The worksheet or excel range to query</param>
''' <returns></returns>
Public Shared Function Query(ByVal File As String, ByVal Range As String) As DataTable
Return New Import(File, Range).Query()
End Function
Public Shared Function _Select(ByVal File As String, ByVal Sql As String) As DataTable
Dim i As New Import(File)
i.SQL = Sql
Return i.Query()
End Function
#End Region

#Region "Constructors"
Public Sub New()
End Sub
Public Sub New(ByVal WorkBook As String)
MyBase.New(WorkBook)
End Sub
Public Sub New(ByVal WorkBook As String, ByVal Range As String)
Me.New(WorkBook)
Me.Range = Range
End Sub
#End Region

#Region "SQL Query"
Private m_fields As String = "*"
''' <summary>
''' The fields which should be returned (default all fields with data: "*")
''' </summary>
<DefaultValue("*")> _
Public Property Fields() As String
Get
Return m_fields
End Get
Set(ByVal value As String)
m_fields = value
End Set
End Property
Private Sub ResetFields()
m_fields = "*"
End Sub
Private m_where As String
''' <summary>
''' An optional where clause. Works pretty much the same as 'normal' SQL. (Default=null)
''' </summary>
<DefaultValue("Nothing")> _
Public Property Where() As String
Get
Return m_where
End Get
Set(ByVal value As String)
m_where = value
End Set
End Property

Public SQL As String
Protected Function GetSelectSQL() As String
If SQL IsNot Nothing Then
Return SQL
End If
' if no sql was provided, construct from worksheet and where
Dim sql__1 As String = String.Format("select {0} from {1}", m_fields, GetRange())
If m_where IsNot Nothing Then
sql__1 += " WHERE " & m_where
End If
Return sql__1
End Function
''' <summary>
''' Performs the query with the specifed settings
''' </summary>
''' <returns></returns>
Public Function Query() As DataTable
Return Query(DirectCast(Nothing, DataTable))
End Function

Public Function Query(ByVal dt As DataTable) As DataTable
CheckWorkbook()
Try
OpenConnection(True)
If dt Is Nothing Then
dt = New DataTable()
End If

Dim da As New OleDbDataAdapter(New OleDbCommand(GetSelectSQL(), Connection))
da.Fill(dt)
Return dt
Finally
CloseConnection(True)
End Try
End Function

Public Sub Fill(ByVal dt As DataTable)
Query(dt)
End Sub
#End Region


End Class
End Namespace
Cách Sử dụng:

View Code:
Dim dt As DataTable
Dim i As New Vtrung.Import("C:\DM_LO_VUA.xls")
i.Where = "HeaderA=22"
dt = i.Query()
hoặc
View Code:
Dim dt As DataTable = Vtrung.Import.Query("C:\DM_LO_VUA.xls", "a2:d4")
hoặc
View Code:
Dim dt As DataTable = Vtrung.Import.Query("C:\DM_LO_VUA.xls")
Bạn có thể Download mã nguồn tại đây:
View Code:
Bookmark and Share

0 comments:

Post a Comment

Next previous home

Cộng đồng yêu thiết kế Việt Nam Thiet ke website, danang