root/branches/0.3/src/plugins/amazon.py

Revision 558, 18.2 kB (checked in by nicfit, 2 years ago)

Album cover search/download is working, more work needed

Line 
1 """Python wrapper
2
3
4 for Amazon web APIs
5
6 This module allows you to access Amazon's web APIs,
7 to do things like search Amazon and get the results programmatically.
8 Described here:
9   http://www.amazon.com/webservices
10
11 You need a Amazon-provided license key to use these services.
12 Follow the link above to get one.  These functions will look in
13 several places (in this order) for the license key:
14 - the "license_key" argument of each function
15 - the module-level LICENSE_KEY variable (call setLicense once to set it)
16 - an environment variable called AMAZON_LICENSE_KEY
17 - a file called ".amazonkey" in the current directory
18 - a file called "amazonkey.txt" in the current directory
19 - a file called ".amazonkey" in your home directory
20 - a file called "amazonkey.txt" in your home directory
21 - a file called ".amazonkey" in the same directory as amazon.py
22 - a file called "amazonkey.txt" in the same directory as amazon.py
23
24 Sample usage:
25 >>> import amazon
26 >>> amazon.setLicense('...') # must get your own key!
27 >>> pythonBooks = amazon.searchByKeyword('Python')
28 >>> pythonBooks[0].ProductName
29 u'Learning Python (Help for Programmers)'
30 >>> pythonBooks[0].URL
31 ...
32 >>> pythonBooks[0].OurPrice
33 ...
34
35 Other available functions:
36 - browseBestSellers
37 - searchByASIN
38 - searchByUPC
39 - searchByAuthor
40 - searchByArtist
41 - searchByActor
42 - searchByDirector
43 - searchByManufacturer
44 - searchByListMania
45 - searchSimilar
46 - searchByWishlist
47
48 Other usage notes:
49 - Most functions can take product_line as well, see source for possible values
50 - All functions can take type="lite" to get less detail in results
51 - All functions can take page=N to get second, third, fourth page of results
52 - All functions can take license_key="XYZ", instead of setting it globally
53 - All functions can take http_proxy="http://x/y/z" which overrides your system setting
54 """
55
56 __author__ = "Mark Pilgrim (f8dy@diveintomark.org)"
57 __version__ = "0.64.1"
58 __cvsversion__ = "$Revision: 1.12 $"[11:-2]
59 __date__ = "$Date: 2004/07/02 13:24:09 $"[7:-2]
60 __copyright__ = "Copyright (c) 2002 Mark Pilgrim"
61 __license__ = "Python"
62 # Powersearch and return object type fix by Joseph Reagle <geek@goatee.net>
63
64 # Locale support by Michael Josephson <mike@josephson.org>
65
66 # Modification to _contentsOf to strip trailing whitespace when loading Amazon key
67 # from a file submitted by Patrick Phalen.
68
69 # Support for specifying locale and associates ID as search parameters and
70 # internationalisation fix for the SalesRank integer conversion by
71 # Christian Theune <ct@gocept.com>, gocept gmbh & co. kg
72
73 # Support for BlendedSearch contributed by Alex Choo
74
75 from xml.dom import minidom
76 import os, sys, getopt, cgi, urllib, string
77 try:
78     import timeoutsocket # http://www.timo-tasi.org/python/timeoutsocket.py
79     timeoutsocket.setDefaultSocketTimeout(10)
80 except ImportError:
81     pass
82
83 LICENSE_KEY = None
84 ASSOCIATE = "webservices-20"
85 HTTP_PROXY = None
86 LOCALE = "us"
87
88 # don't touch the rest of these constants
89 class AmazonError(Exception): pass
90 class NoLicenseKey(Exception): pass
91 _amazonfile1 = ".amazonkey"
92 _amazonfile2 = "amazonkey.txt"
93 _licenseLocations = (
94     (lambda key: key, 'passed to the function in license_key variable'),
95     (lambda key: LICENSE_KEY, 'module-level LICENSE_KEY variable (call setLicense to set it)'),
96     (lambda key: os.environ.get('AMAZON_LICENSE_KEY', None), 'an environment variable called AMAZON_LICENSE_KEY'),
97     (lambda key: _contentsOf(os.getcwd(), _amazonfile1), '%s in the current directory' % _amazonfile1),
98     (lambda key: _contentsOf(os.getcwd(), _amazonfile2), '%s in the current directory' % _amazonfile2),
99     (lambda key: _contentsOf(os.environ.get('HOME', ''), _amazonfile1), '%s in your home directory' % _amazonfile1),
100     (lambda key: _contentsOf(os.environ.get('HOME', ''), _amazonfile2), '%s in your home directory' % _amazonfile2),
101     (lambda key: _contentsOf(_getScriptDir(), _amazonfile1), '%s in the amazon.py directory' % _amazonfile1),
102     (lambda key: _contentsOf(_getScriptDir(), _amazonfile2), '%s in the amazon.py directory' % _amazonfile2)
103     )
104 _supportedLocales = {
105         "us" : (None, "xml.amazon.com"),   
106         "uk" : ("uk", "xml-eu.amazon.com"),
107         "de" : ("de", "xml-eu.amazon.com"),
108         "jp" : ("jp", "xml.amazon.co.jp")
109     }
110
111 ## administrative functions
112 def version():
113     print """PyAmazon %(__version__)s
114 %(__copyright__)s
115 released %(__date__)s
116 """ % globals()
117
118 def setAssociate(associate):
119     global ASSOCIATE
120     ASSOCIATE=associate
121
122 def getAssociate(override=None):
123     return override or ASSOCIATE
124
125 ## utility functions
126
127 def _checkLocaleSupported(locale):
128     if not _supportedLocales.has_key(locale):
129         raise AmazonError, ("Unsupported locale. Locale must be one of: %s" %
130             string.join(_supportedLocales, ", "))
131
132 def setLocale(locale):
133     """set locale"""
134     global LOCALE
135     _checkLocaleSupported(locale)
136     LOCALE = locale
137
138 def getLocale(locale=None):
139     """get locale"""
140     return locale or LOCALE
141
142 def setLicense(license_key):
143     """set license key"""
144     global LICENSE_KEY
145     LICENSE_KEY = license_key
146
147 def getLicense(license_key = None):
148     """get license key
149
150     license key can come from any number of locations;
151     see module docs for search order"""
152     for get, location in _licenseLocations:
153         rc = get(license_key)
154         if rc: return rc
155     raise NoLicenseKey, 'get a license key at http://www.amazon.com/webservices'
156
157 def setProxy(http_proxy):
158     """set HTTP proxy"""
159     global HTTP_PROXY
160     HTTP_PROXY = http_proxy
161
162 def getProxy(http_proxy = None):
163     """get HTTP proxy"""
164     return http_proxy or HTTP_PROXY
165
166 def getProxies(http_proxy = None):
167     http_proxy = getProxy(http_proxy)
168     if http_proxy:
169         proxies = {"http": http_proxy}
170     else:
171         proxies = None
172     return proxies
173
174 def _contentsOf(dirname, filename):
175     filename = os.path.join(dirname, filename)
176     if not os.path.exists(filename): return None
177     fsock = open(filename)
178     contents =  fsock.read().strip()
179     fsock.close()
180     return contents
181
182 def _getScriptDir():
183     if __name__ == '__main__':
184         return os.path.abspath(os.path.dirname(sys.argv[0]))
185     else:
186         return os.path.abspath(os.path.dirname(sys.modules[__name__].__file__))
187
188 class Bag: pass
189
190 def unmarshal(element):
191     rc = Bag()
192     if isinstance(element, minidom.Element) and (element.tagName == 'Details'):
193         rc.URL = element.attributes["url"].value
194     childElements = [e for e in element.childNodes if isinstance(e, minidom.Element)]
195     if childElements:
196         for child in childElements:
197             key = child.tagName
198             if hasattr(rc, key):
199                 if type(getattr(rc, key)) <> type([]):
200                     setattr(rc, key, [getattr(rc, key)])
201                 setattr(rc, key, getattr(rc, key) + [unmarshal(child)])
202             elif isinstance(child, minidom.Element) and (child.tagName == 'Details'):
203                 # make the first Details element a key
204                 setattr(rc,key,[unmarshal(child)])
205                 #dbg: because otherwise 'hasattr' only tests
206                 #dbg: on the second occurence: if there's a
207                 #dbg: single return to a query, it's not a
208                 #dbg: list. This module should always
209                 #dbg: return a list of Details objects.
210             else:
211                 setattr(rc, key, unmarshal(child))
212     else:
213         rc = "".join([e.data for e in element.childNodes if isinstance(e, minidom.Text)])
214         if element.tagName == 'SalesRank':
215             rc = rc.replace('.', '')
216             rc = rc.replace(',', '')
217             rc = int(rc)
218     return rc
219
220 def buildURL(search_type, keyword, product_line, type, page, license_key, locale, associate):
221     _checkLocaleSupported(locale)
222     url = "http://" + _supportedLocales[locale][1] + "/onca/xml3?f=xml"
223     url += "&t=%s" % associate
224     url += "&dev-t=%s" % license_key.strip()
225     url += "&type=%s" % type
226     if _supportedLocales[locale][0]:
227         url += "&locale=%s" % _supportedLocales[locale][0]
228     if page:
229         url += "&page=%s" % page
230     if product_line:
231         url += "&mode=%s" % product_line
232     url += "&%s=%s" % (search_type, urllib.quote(keyword))
233     return url
234
235
236 ## main functions
237
238
239 def search(search_type, keyword, product_line, type = "heavy", page = None,
240            license_key=None, http_proxy = None, locale = None, associate = None):
241     """search Amazon
242
243     You need a license key to call this function; see
244     http://www.amazon.com/webservices
245     to get one.  Then you can either pass it to
246     this function every time, or set it globally; see the module docs for details.
247
248     Parameters:
249     keyword - keyword to search
250     search_type - in (KeywordSearch, BrowseNodeSearch, AsinSearch, UpcSearch, AuthorSearch, ArtistSearch, ActorSearch, DirectorSearch, ManufacturerSearch, ListManiaSearch, SimilaritySearch)
251     product_line - type of product to search for.  restrictions based on search_type
252         UpcSearch - in (music, classical)
253         AuthorSearch - must be "books"
254         ArtistSearch - in (music, classical)
255         ActorSearch - in (dvd, vhs, video)
256         DirectorSearch - in (dvd, vhs, video)
257         ManufacturerSearch - in (electronics, kitchen, videogames, software, photo, pc-hardware)
258     http_proxy (optional) - address of HTTP proxy to use for sending and receiving SOAP messages
259
260     Returns: list of Bags, each Bag may contain the following attributes:
261       Asin - Amazon ID ("ASIN" number) of this item
262       Authors - list of authors
263       Availability - "available", etc.
264       BrowseList - list of related categories
265       Catalog - catalog type ("Book", etc)
266       CollectiblePrice - ?, format "$34.95"
267       ImageUrlLarge - URL of large image of this item
268       ImageUrlMedium - URL of medium image of this item
269       ImageUrlSmall - URL of small image of this item
270       Isbn - ISBN number
271       ListPrice - list price, format "$34.95"
272       Lists - list of ListMania lists that include this item
273       Manufacturer - manufacturer
274       Media - media ("Paperback", "Audio CD", etc)
275       NumMedia - number of different media types in which this item is available
276       OurPrice - Amazon price, format "$24.47"
277       ProductName - name of this item
278       ReleaseDate - release date, format "09 April, 1999"
279       Reviews - reviews (AvgCustomerRating, plus list of CustomerReview with Rating, Summary, Content)
280       SalesRank - sales rank (integer)
281       SimilarProducts - list of Product, which is ASIN number
282       ThirdPartyNewPrice - ?, format "$34.95"
283       URL - URL of this item
284     """
285     license_key = getLicense(license_key)
286     locale = getLocale(locale)
287     associate = getAssociate(associate)
288     url = buildURL(search_type, keyword, product_line, type, page,
289             license_key, locale, associate)
290     proxies = getProxies(http_proxy)
291     u = urllib.FancyURLopener(proxies)
292     usock = u.open(url)
293     xmldoc = minidom.parse(usock)
294
295 #     from xml.dom.ext import PrettyPrint
296 #     PrettyPrint(xmldoc)
297
298     usock.close()
299     if search_type == "BlendedSearch":
300         data = unmarshal(xmldoc).BlendedSearch
301     else:   
302         data = unmarshal(xmldoc).ProductInfo       
303        
304     if hasattr(data, 'ErrorMsg'):
305         raise AmazonError, data.ErrorMsg
306     else:
307         if search_type == "BlendedSearch":
308             # a list of ProductLine containing a list of ProductInfo
309             # containing a list of Details.
310             return data
311         else:           
312             return data.Details
313
314 def searchByKeyword(keyword, product_line="books", type="heavy", page=1, license_key=None, http_proxy=None, locale=None, associate=None):
315     return search("KeywordSearch", keyword, product_line, type, page, license_key, http_proxy, locale, associate)
316
317 def browseBestSellers(browse_node, product_line="books", type="heavy", page=1, license_key=None, http_proxy=None, locale=None, associate=None):
318     return search("BrowseNodeSearch", browse_node, product_line, type, page, license_key, http_proxy, locale, associate)
319
320 def searchByASIN(ASIN, type="heavy", license_key=None, http_proxy=None, locale=None, associate=None):
321     return search("AsinSearch", ASIN, None, type, None, license_key, http_proxy, locale, associate)
322
323 def searchByUPC(UPC, type="heavy", license_key=None, http_proxy=None, locale=None, associate=None):
324     return search("UpcSearch", UPC, None, type, None, license_key, http_proxy, locale, associate)
325
326 def searchByAuthor(author, type="heavy", page=1, license_key=None, http_proxy=None, locale=None, associate=None):
327     return search("AuthorSearch", author, "books", type, page, license_key, http_proxy, locale, associate)
328
329 def searchByArtist(artist, product_line="music", type="heavy", page=1, license_key=None, http_proxy=None, locale=None, associate=None):
330     if product_line not in ("music", "classical"):
331         raise AmazonError, "product_line must be in ('music', 'classical')"
332     return search("ArtistSearch", artist, product_line, type, page, license_key, http_proxy, locale, associate)
333
334 def searchByActor(actor, product_line="dvd", type="heavy", page=1, license_key=None, http_proxy=None, locale=None, associate=None):
335     if product_line not in ("dvd", "vhs", "video"):
336         raise AmazonError, "product_line must be in ('dvd', 'vhs', 'video')"
337     return search("ActorSearch", actor, product_line, type, page, license_key, http_proxy, locale, associate)
338
339 def searchByDirector(director, product_line="dvd", type="heavy", page=1, license_key=None, http_proxy=None, locale=None, associate=None):
340     if product_line not in ("dvd", "vhs", "video"):
341         raise AmazonError, "product_line must be in ('dvd', 'vhs', 'video')"
342     return search("DirectorSearch", director, product_line, type, page, license_key, http_proxy, locale, associate)
343
344 def searchByManufacturer(manufacturer, product_line="pc-hardware", type="heavy", page=1, license_key=None, http_proxy=None, locale=None, associate=None):
345     if product_line not in ("electronics", "kitchen", "videogames", "software", "photo", "pc-hardware"):
346         raise AmazonError, "product_line must be in ('electronics', 'kitchen', 'videogames', 'software', 'photo', 'pc-hardware')"
347     return search("ManufacturerSearch", manufacturer, product_line, type, page, license_key, http_proxy, locale, associate)
348
349 def searchByListMania(listManiaID, type="heavy", page=1, license_key=None, http_proxy=None, locale=None, associate=None):
350     return search("ListManiaSearch", listManiaID, None, type, page, license_key, http_proxy, locale, associate)
351
352 def searchSimilar(ASIN, type="heavy", page=1, license_key=None, http_proxy=None, locale=None, associate=None):
353     return search("SimilaritySearch", ASIN, None, type, page, license_key, http_proxy, locale, associate)
354
355 def searchByWishlist(wishlistID, type="heavy", page=1, license_key=None, http_proxy=None, locale=None, associate=None):
356     return search("WishlistSearch", wishlistID, None, type, page, license_key, http_proxy, locale, associate)
357
358 def searchByPower(keyword, product_line="books", type="heavy", page=1, license_key=None, http_proxy=None, locale=None, associate=None):
359     return search("PowerSearch", keyword, product_line, type, page, license_key, http_proxy, locale, associate)
360     # >>> RecentKing = amazon.searchByPower('author:Stephen King and pubdate:2003')
361     # >>> SnowCrash = amazon.searchByPower('title:Snow Crash')
362
363 def searchByBlended(keyword, type="heavy", page=1, license_key=None, http_proxy=None, locale=None, associate=None):
364     return search("BlendedSearch", keyword, None, type, page, license_key, http_proxy, locale, associate)
365
366 class OnDemandAmazonList:
367     # Copyright (C) 2005  Leonard Richardson [OnDemandAmazonList]
368     """A virtual list used to hide the paged implementation of the
369     Amazon Web Services. AWS queries return one page at a time, but
370     you can iterate over this list as though you had the whole
371     thing. It will silently make calls to Amazon when you spill over
372     from one page to the next."""
373
374     def __init__(self, function, kwds):
375         """Pass in the PyAmazon function you want to call, with a map
376         of the arguments you'd give it to get the first page of
377         results for your query. For instance:
378         OnDemandAmazonList(amazon.searchByAuthor, {'author' : 'James Joyce'})"""
379
380         self.function = function
381         self.kwds = kwds
382
383     def __iter__(self):
384         """Returns an iterator that knows how to traverse the list
385         by making multiple AWS requests.."""
386         return self.PageBasedIterator(self)
387
388     class PageBasedIterator:
389
390         def __init__(self, l):
391             import copy
392             self.definition = l
393             self.kwds = copy.copy(self.definition.kwds)
394             self.thisPageNumber = 0
395             self.thisPage = None
396             self.thisPageCounter = None
397
398         def __iter__(self):
399             "The __iter__ of an iterator object is itself."
400             return self
401
402         def next(self):
403             """Returns the next item in the current page, or (if we've
404             reached the end of the current page) retrieves the next
405             page from Amazon and starts on it. If there is no next
406             page or it's the same as the current page, the iteration
407             stops."""
408
409             value = None
410             if not self.thisPage or \
411                self.thisPageCounter + 1 == len(self.thisPage):
412                 #Get the next page
413                 self.thisPageNumber += 1
414                 self.kwds['page'] = self.thisPageNumber
415                 oldPage = self.thisPage
416                 try:
417                     self.thisPage = self.definition.function(**self.kwds)
418                 except AmazonError, e:
419                     if str(e) == "There are no exact matches for the search.":
420                         #We tried to go a page off the list. That means
421                         #we've gotten all the results in the list.
422                         raise StopIteration
423                     else:
424                         #It's some other error: propagate it upwards.
425                         raise AmazonError, e
426                 if oldPage and oldPage[0].Asin == self.thisPage[0].Asin:
427                     #Amazon gave us the same page twice; that means we've
428                     #iterated over the end of the list and Amazon isn't
429                     #telling us.
430                     raise StopIteration
431                 self.thisPageCounter = 0
432             try:
433                 value = self.thisPage[self.thisPageCounter]
434             except IndexError:
435                 raise StopIteration
436             else:
437                 self.thisPageCounter += 1
438                 return value
Note: See TracBrowser for help on using the browser.