代码之家  ›  专栏  ›  技术社区  ›  M.Koch

Powershell文档。getElementbyID-无法提取正确的信息

  •  0
  • M.Koch  · 技术社区  · 7 年前

    我有一个小powershell问题困扰了我很长一段时间了。

    我试图从RSS站点获取信息。我下载了XML并进行了检查。我只是想从中得到一些东西。我用的是.document.getElementByID().outerText

    这是我的代码:

    <#
    AUTHOR: KOCH,MICHAEL [GRE-IT]
    DESCRIPTION: RSS READER
    DATE: 28.06.17
    DATE LAST WRITTEN: 19.07.17
    LAST CHANGE: 
    #>
    
    $debug = 1 #DEBUG
    
    $receiver="A@MailAdress.com" 
    $sender="A@MailAdress.com" 
    $smtp="A.SMTP.SERVER"
    $encoding = [System.Text.Encoding]::UTF8
    
    $path_config = "C:\RSS\Zoll\config.txt"
    $output = "C:\RSS\Zoll\meldung.html"
    $output_edit_path = "C:\RSS\Zoll\meldung_edit.html"
    $nmbr=0
    $count=0
    
    Invoke-WebRequest -Uri 'http://www.zoll.de/SiteGlobals/Functions/RSSFeed/DE/RSSNewsfeed/RSSZollImFokus.xml' -OutFile C:\RSS\Zoll\meldungen.xml -ErrorAction Stop
    [xml]$content = Get-Content C:\RSS\Zoll\meldungen.xml
    $feed = $content.rss.channel
    $tag = @()
    
    if($lines=Get-Content $path_config | Measure-Object -Line) #gets the number of lines
    { 
        while($count -ne $lines.Lines)
        {
            if($entrys=(Get-Content $path_config)[$nmbr]) #gets the entrys from config.txt and goes through line by line
            {
                $entrys >> $tag[$nmbr]
    
                if ($debug -eq 1)
                {
                    Write-Output "$tag[$nmbr]"
                    Write-Output "$entrys"
                    Write-Output "$count"
                }
            }
            $count++
            $nmbr++ #jumps into the next line
        }
    }
    
    $ie = New-Object -ComObject "InternetExplorer.Application"
    
    Foreach($msg in $feed.Item)
    {
        $link = ($msg.link)
        $subject = ($msg.title)
    
        $ie.navigate("$link")
    
        #$return = Invoke-WebRequest -Uri $link -OutFile "C:\RSS\Zoll\link.html"
    
        $return = $ie.document
        $innertext = $return.documentElement.document.getElementById("main").outerText
    
        $body = $innertext#.Replace('Ä', '&Auml;')
        <#
        $body = $innertext.Replace('ä', '&auml;')
        $body = $innertext.Replace('Ö', '&Ouml;')
        $body = $innertext.Replace('ö', '&ouml;') 
        $body = $innertext.Replace('Ü', '&Uuml;')
        $body = $innertext.Replace('ü', '&uuml;')
        $body = $innertext.Replace('ß', '&szlig;')
        #>
            if ($debug -eq 1)
            {
                Write-Output "Subject $subject"
                Write-Output "Tag $tag"
                Write-Output "Link $link"
                Write-Output $body
                #exit
            }
    
            if($link -match "Zigaretten") #searchs in the <link> for the string "Zigaretten" 
            {
               if($subject -match $tag) #searches for the specified tag in config.txt !!! only one argument per line !!!
               {
                   if($mail = Send-MailMessage -From "$sender" -To "$receiver" -Subject "Zoll Meldung: $subject" -Body "$body" -SmtpServer "$smtp" -BodyAsHtml -encoding $encoding)
                   {
                       if($debug -eq 1)
                       {
                           Write-Output "$tag"
                           Write-Output "Send. Tag = $tag"
                       }
                           Write-Output "Send."
                   }
               }
           }
            else
            {
             Write-Host "Empty."
            }
    }
    
    $ie.Quit()
    [System.Runtime.Interopservices.Marshal]::ReleaseComObject($ie)
    Remove-Variable ie
    
    1 回复  |  直到 7 年前
        1
  •  1
  •   M.Koch    7 年前

    添加了wait if busy循环,以确保IE加载完整的html文档。这就是问题的解决方案!:)

    <#
    AUTHOR: KOCH,MICHAEL [GRE-IT]
    DESCRIPTION: RSS READER
    DATE: 28.06.17
    DATE LAST WRITTEN: 20.07.17
    LAST CHANGE: ADDED WAIT IF BUSY ! 
    #>
    
    $debug = 0 #DEBUG
    
    $receiver="A@MailAdress.de" 
    $sender="A@MailAdress.de" 
    $smtp="A.SMTP.SERVER"
    $encoding = [System.Text.Encoding]::UTF8
    
    $path_config = "C:\RSS\Zoll\config.txt"
    $output = "C:\RSS\Zoll\meldung.html"
    $output_edit_path = "C:\RSS\Zoll\meldung_edit.html"
    $nmbr=0
    $count=0
    
    Invoke-WebRequest -Uri 'http://www.zoll.de/SiteGlobals/Functions/RSSFeed/DE/RSSNewsfeed/RSSZollImFokus.xml' -OutFile C:\RSS\Zoll\meldungen.xml -ErrorAction Stop
    [xml]$content = Get-Content C:\RSS\Zoll\meldungen.xml
    $feed = $content.rss.channel
    $tag = @()
    
    if($lines=Get-Content $path_config | Measure-Object -Line) #gets the number of lines
    { 
        while($count -ne $lines.Lines)
        {
            if($entrys=(Get-Content $path_config)[$nmbr]) #gets the entrys from config.txt and goes through line by line
            {
                $entrys >> $tag[$nmbr]
    
                if ($debug -eq 1)
                {
                    Write-Output "$tag[$nmbr]"
                    Write-Output "$entrys"
                    Write-Output "$count"
                }
            }
            $count++
            $nmbr++ #jumps into the next line
        }
    }
    
    $ie = New-Object -ComObject InternetExplorer.Application #creates new ComObject IE
    
    Foreach($msg in $feed.Item)
    {
        $link = ($msg.link)
        $subject = ($msg.title)
    
        if ($debug -eq 1)
        {
            $ie.visible = $true
        }
    
        $ie.navigate("$link") #navigate with Internetexplorer to the website
    
        while ($ie.busy -and $ie.ReadyState -ne 4){ sleep -Milliseconds 200 } # if getting the website from IE.navigate is still .busy wait 200 milliseconds
    
        $return = $ie.document 
    
        $innertext = $return.documentelement.document.IHTMLDocument3_getElementById("main").outerText #gets the outer text from the div with the element ID "main"
    
        while ($innertext.busy -and $innertext.ReadyState -ne 4){ sleep -Milliseconds 200 } # if getting Text is .busy wait 200 milliseconds
    
        $body = $innertext
    
        if ($debug -eq 1)
        {
            Write-Output "Subject $subject"
            Write-Output "Tag $tag"
            Write-Output "Link $link"
            Write-Output "INNERTEXT $innertext"
            Write-Output "BODY $body"
            #exit
        }
    
        if($link -match "Zigaretten") #searchs in the <link> for the string "Zigaretten" 
        {
            if($subject -match $tag) #searches for the specified tag in config.txt !!! only one argument per line !!!
            {
                if($mail = Send-MailMessage -From "$sender" -To "$receiver" -Subject "Zoll Meldung: $subject" -Body "$body" -SmtpServer "$smtp" -BodyAsHtml -encoding $encoding)
                {
                    Write-Output "Send."
                }
            }
        }
        else
        {
            Write-Host "Empty."
        }
    }
    
    $ie.Quit()                                                               #----| 
    [System.Runtime.Interopservices.Marshal]::ReleaseComObject($ie)          #    ---> Quits the Internet Explorer Session otherwise there are to many IE.exe open and no more ID's left
    Remove-Variable ie