Quantcast
Channel: VBForums - Visual Basic .NET
Viewing all articles
Browse latest Browse all 27422

VS 2013 Web browser loaded status fired multiple times

$
0
0
Hello,

Am trying to use HTML agility pack to scrape data from a website after it has completely loaded in the web browser.
the problem am facing is that the document loaded status is fired multiple times and my code is triggered on the first time when document is loaded, but the document only completely loads way later.


this is the code i use on my button to start everything :

Code:

Private Sub Button1_Click(sender As Object, e As EventArgs) Handles Button1.Click
WebBrowser1.Navigate("https://pages.etoro.com/widgets/trader-insight/")
    End Sub


this is the code i use to scrape the data :

Code:

    Private Property pageready As Boolean = False
Private dt_toro As DataTable


Code:

Imports System.IO
Imports HtmlAgilityPack
Imports System.Net
  Private Sub LoadHtmlWithBrowser_toro()

       

                torodata.DataSource = Nothing
                torodata.Rows.Clear()
                torodata.Columns.Clear()

                dt_toro = New DataTable
                dt_toro.Columns.Add("Feed")
                dt_toro.Columns.Add("Long")
                dt_toro.Columns.Add("Short")

                torodata.DataSource = dt_toro


                WebBrowser1.ScriptErrorsSuppressed = True
                WebBrowser1.Navigate(url)
                WaitForPageLoad_toro()
               

                Dim doc As New HtmlAgilityPack.HtmlDocument()
                Dim documentAsIHtmlDocument3 = DirectCast(WebBrowser1.Document.DomDocument, mshtml.IHTMLDocument3)
                Dim sr As New StringReader(documentAsIHtmlDocument3.documentElement.outerHTML)
                doc.Load(sr)

                ' toro Data EURUSD
                Dim fx_toro_longdata_eurusd As String
                Dim fx_toro_shortdata_eurusd As String
                ''  Dim torodoc As New HtmlAgilityPack.HtmlDocument()

                Dim fx_toro_long_eurusd As HtmlNode = doc.DocumentNode.SelectSingleNode("/html/body/div/div[2]/div[3]/div[1]/div[1]/div[2]")
                Dim fx_toro_short_eurusd As HtmlNode = doc.DocumentNode.SelectSingleNode("/html/body/div/div[2]/div[3]/div[1]/div[1]/div[3]")
                fx_toro_longdata_eurusd = (fx_toro_long_eurusd.InnerText)
                fx_toro_shortdata_eurusd = (fx_toro_short_eurusd.InnerText)

                Dim dt_toro_EURUSD As DataRow = dt_toro.NewRow()
                dt_toro_EURUSD(0) = "EURUSD"
                dt_toro_EURUSD(1) = fx_toro_longdata_eurusd
                dt_toro_EURUSD(2) = fx_toro_shortdata_eurusd
                dt_toro.Rows.Add(dt_toro_EURUSD)
                torodata.DataSource = dt_toro

this is the code I use for the web browser to complete task :

Code:

Private Sub WaitForPageLoad_toro()
        AddHandler WebBrowser1.DocumentCompleted, New WebBrowserDocumentCompletedEventHandler(AddressOf PageWaiter_toro)
        While Not pageready
            Application.DoEvents()

        End While
        pageready = False
    End Sub

Code:

Private Sub PageWaiter_toro(ByVal sender As Object, ByVal e As WebBrowserDocumentCompletedEventArgs)
        If WebBrowser1.ReadyState = WebBrowserReadyState.Complete Then
            pageready = True
            RemoveHandler WebBrowser1.DocumentCompleted, New WebBrowserDocumentCompletedEventHandler(AddressOf PageWaiter_toro)

        End If
    End Sub


The problem is my scraping code is fired before the data is completely loaded, apparently the page am trying to scrape has a few JavaScript to load and that's causing the issue.

Viewing all articles
Browse latest Browse all 27422

Trending Articles



<script src="https://jsc.adskeeper.com/r/s/rssing.com.1596347.js" async> </script>