1

I am using below code to extract data from Amazon.

Sub Macro1()
    ' Macro1 Macro
    With ActiveSheet.QueryTables.Add(Connection:= _
        "URL;http://www.amazon.com/gp/offer-listing/B00N41UTWG/ref=olp_f_new?ie=UTF8&f_new=true" _
        , Destination:=Range("$A$1"))
        .Name = "oldOfferPrice" _
        ' "its_details_value_node.html?nsc=true&listId=www_s201_b9233&tsId=BBK01.ED0439"
        .FieldNames = True
        .RowNumbers = True
        .FillAdjacentFormulas = False
        .PreserveFormatting = True
        .RefreshOnFileOpen = False
        .BackgroundQuery = True
        .RefreshStyle = xlInsertDeleteCells
        .SavePassword = False
        .SaveData = True
        .AdjustColumnWidth = True
        .RefreshPeriod = 0
        .WebSelectionType = xlEntirePage
        .WebFormatting = xlWebFormattingNone
        .WebPreFormattedTextToColumns = True
        .WebConsecutiveDelimitersAsOne = True
        .WebSingleBlockTextImport = True
        .WebDisableDateRecognition = False
        .WebDisableRedirections = False
        .Refresh BackgroundQuery:=False

    End With
End Sub

Above code is extracting complete page data but my requirement is to extract only prices. Page prices are in this format.

<div class="a-row a-spacing-mini olpOffer">
                    <div class="a-column a-span2">
       <span class="a-size-large a-color-price olpOfferPrice a-text-bold">                $171.99                </span>
<span class="a-color-price">
<span class="supersaver"><i class="a-icon a-icon-prime" aria-label="Amazon Prime TM"><span class="a-icon-alt">Amazon Prime TM</span></i></span>
</span>

I want to extract two values i.e $171.99 and Amazon Prime TM. There may be multiple price and seller values in one page and I want to extract all.

Community
  • 1
  • 1

1 Answers1

1

Here is an example showing how you can retrieve Amazon offers for certain ASIN using XHR and Split, and output results to the sheet:

Sub TestExtractAmazonOffers()

    Dim arrList() As Variant

    ' clear sheet
    Sheets("Sheet1").Cells.Delete
    ' retrieve offers for certain ASIN
    arrList = ExtractAmazonOffers("B07CR8D2DW")
    ' output data
    Output Sheets("Sheet1"), 1, 1, arrList

End Sub

Function ExtractAmazonOffers(strASIN As String)

    Dim strUrl As String
    Dim arrTmp() As String
    Dim strTmp As String
    Dim arrItems() As String
    Dim i As Long
    Dim arrCols() As String
    Dim strSellerName As String
    Dim strOfferPrice As String
    Dim strAmazonPrime As String
    Dim strShippingPrice As String
    Dim arrResults() As Variant
    Dim arrCells() As Variant

    ' init
    arrResults = Array(Array("Offer Price", "Amazon Prime TM", "Shipping Price", "Seller Name"))
    strUrl = "https://www.amazon.com/gp/offer-listing/" & strASIN & "/ref=olp_f_new?ie=UTF8&f_new=true"
    Do
        ' http get request of the search result page
        With CreateObject("MSXML2.XMLHTTP")
            .Open "GET", strUrl, False
            .Send
            strResp = .ResponseText
        End With
        arrTmp = Split(strResp, "id=""olpOfferList""", 2)
        If UBound(arrTmp) = 1 Then
            arrItems = Split(arrTmp(1), "<div class=""a-row a-spacing-mini olpOffer""")
            For i = 1 To UBound(arrItems)
                ' get item columns
                arrCols = Split(arrItems(i), "<div class=""a-column", 6)
                ' retrieve seller name from column 4
                strTmp = Split(arrCols(4), "olpSellerName", 2)(1)
                arrTmp = Split(strTmp, "alt=""", 2)
                If UBound(arrTmp) = 1 Then ' from image alt
                    strTmp = Split(arrTmp(1), """", 2)(0)
                    strSellerName = Trim(strTmp)
                Else ' from link
                    strTmp = Split(strTmp, "<a", 2)(1)
                    strTmp = Split(strTmp, ">", 2)(1)
                    strTmp = Split(strTmp, "<", 2)(0)
                    strSellerName = Trim(strTmp)
                End If
                ' retrieve offer price from column 1
                strTmp = Split(arrCols(1), "olpOfferPrice", 2)(1)
                strTmp = Split(strTmp, ">", 2)(1)
                strTmp = Split(strTmp, "<", 2)(0)
                strOfferPrice = Trim(strTmp)
                ' retrieve amazon prime
                arrTmp = Split(arrCols(1), "olpShippingInfo", 2)
                strAmazonPrime = IIf(InStr(arrTmp(0), "Amazon Prime") > 0, "Amazon Prime", "-")
                ' retrieve shipping info
                arrTmp = Split(arrTmp(1), "olpShippingPrice", 2)
                If UBound(arrTmp) = 1 Then
                    strTmp = Split(arrTmp(1), ">", 2)(1)
                    strTmp = Split(strTmp, "<", 2)(0)
                    strShippingPrice = Trim(strTmp)
                Else
                    strShippingPrice = "Free"
                End If
                ' store data
                ReDim Preserve arrResults(UBound(arrResults) + 1)
                arrResults(UBound(arrResults)) = Array(strOfferPrice, strAmazonPrime, strShippingPrice, strSellerName)
            Next
        End If
        ' search for next page link
        arrTmp = Split(strResp, "class=""a-last""", 2)
        If UBound(arrTmp) = 0 Then Exit Do
        strTmp = Split(arrTmp(1), "href=""", 2)(1)
        strUrl = Split(strTmp, """", 2)(0)
        If Left(strUrl, 1) = "/" Then strUrl = "https://www.amazon.com" & strUrl
    Loop
    ' convert nested array to 2-dimensional array
    ReDim arrCells(UBound(arrResults), 3)
    For i = 0 To UBound(arrCells, 1)
        For j = 0 To UBound(arrCells, 2)
            arrCells(i, j) = arrResults(i)(j)
        Next
    Next
    ExtractAmazonOffers = arrCells

End Function

Sub Output(objSheet As Worksheet, lngTop As Long, lngLeft As Long, arrCells As Variant)

    With objSheet
        .Select
        With .Range(.Cells(lngTop, lngLeft), .Cells( _
                UBound(arrCells, 1) - LBound(arrCells, 1) + lngTop, _
                UBound(arrCells, 2) - LBound(arrCells, 2) + lngLeft))
            .NumberFormat = "@"
            .Value = arrCells
            .Columns.AutoFit
        End With
    End With

End Sub

The resulting sheet is as follows:

result

omegastripes
  • 11,971
  • 4
  • 40
  • 81