Web   ·   Wiki   ·   Activities   ·   Blog   ·   Lists   ·   Chat   ·   Meeting   ·   Bugs   ·   Git   ·   Translate   ·   Archive   ·   People   ·   Donate
summaryrefslogtreecommitdiffstats
path: root/shell/google/google.py
diff options
context:
space:
mode:
Diffstat (limited to 'shell/google/google.py')
-rwxr-xr-xshell/google/google.py638
1 files changed, 0 insertions, 638 deletions
diff --git a/shell/google/google.py b/shell/google/google.py
deleted file mode 100755
index a20ba51..0000000
--- a/shell/google/google.py
+++ /dev/null
@@ -1,638 +0,0 @@
-"""
-Python wrapper for Google web APIs
-
-This module allows you to access Google's web APIs through SOAP,
-to do things like search Google and get the results programmatically.
-Described U{here <http://www.google.com/apis/>}
-
-You need a Google-provided license key to use these services.
-Follow the link above to get one. These functions will look in
-several places (in this order) for the license key:
-
- - the "license_key" argument of each function
- - the module-level LICENSE_KEY variable (call setLicense once to set it)
- - an environment variable called GOOGLE_LICENSE_KEY
- - a file called ".googlekey" in the current directory
- - a file called "googlekey.txt" in the current directory
- - a file called ".googlekey" in your home directory
- - a file called "googlekey.txt" in your home directory
- - a file called ".googlekey" in the same directory as google.py
- - a file called "googlekey.txt" in the same directory as google.py
-
-Sample usage::
-
- >>> import google
- >>> google.setLicense('...') # must get your own key!
- >>> data = google.doGoogleSearch('python')
- >>> data.meta.searchTime
- 0.043221000000000002
-
- >>> data.results[0].URL
- 'http://www.python.org/'
-
- >>> data.results[0].title
- '<b>Python</b> Language Website'
-
-@newfield contrib: Contributors
-@author: Mark Pilgrim <f8dy@diveintomark.org>
-@author: Brian Landers <brian@bluecoat93.org>
-@license: Python
-@version: 0.6
-@contrib: David Ascher, for the install script
-@contrib: Erik Max Francis, for the command line interface
-@contrib: Michael Twomey, for HTTP proxy support
-@contrib: Mark Recht, for patches to support SOAPpy
-"""
-
-__author__ = "Mark Pilgrim (f8dy@diveintomark.org)"
-__version__ = "0.6"
-__cvsversion__ = "$Revision: 1.5 $"[11:-2]
-__date__ = "$Date: 2004/02/25 23:46:07 $"[7:-2]
-__copyright__ = "Copyright (c) 2002 Mark Pilgrim"
-__license__ = "Python"
-__credits__ = """David Ascher, for the install script
-Erik Max Francis, for the command line interface
-Michael Twomey, for HTTP proxy support"""
-
-import os, sys, getopt
-import GoogleSOAPFacade
-
-LICENSE_KEY = None
-HTTP_PROXY = None
-
-#
-# Constants
-#
-_url = 'http://api.google.com/search/beta2'
-_namespace = 'urn:GoogleSearch'
-_googlefile1 = ".googlekey"
-_googlefile2 = "googlekey.txt"
-
-_false = GoogleSOAPFacade.false
-_true = GoogleSOAPFacade.true
-
-_licenseLocations = (
- ( lambda key: key,
- 'passed to the function in license_key variable' ),
- ( lambda key: LICENSE_KEY,
- 'module-level LICENSE_KEY variable (call setLicense to set it)' ),
- ( lambda key: os.environ.get( 'GOOGLE_LICENSE_KEY', None ),
- 'an environment variable called GOOGLE_LICENSE_KEY' ),
- ( lambda key: _contentsOf( os.getcwd(), _googlefile1 ),
- '%s in the current directory' % _googlefile1),
- ( lambda key: _contentsOf( os.getcwd(), _googlefile2 ),
- '%s in the current directory' % _googlefile2),
- ( lambda key: _contentsOf( os.environ.get( 'HOME', '' ), _googlefile1 ),
- '%s in your home directory' % _googlefile1),
- ( lambda key: _contentsOf( os.environ.get( 'HOME', '' ), _googlefile2 ),
- '%s in your home directory' % _googlefile2 ),
- ( lambda key: _contentsOf( _getScriptDir(), _googlefile1 ),
- '%s in the google.py directory' % _googlefile1 ),
- ( lambda key: _contentsOf( _getScriptDir(), _googlefile2 ),
- '%s in the google.py directory' % _googlefile2 )
-)
-
-## ----------------------------------------------------------------------
-## Exceptions
-## ----------------------------------------------------------------------
-
-class NoLicenseKey(Exception):
- """
- Thrown when the API is unable to find a valid license key.
- """
- pass
-
-## ----------------------------------------------------------------------
-## administrative functions (non-API)
-## ----------------------------------------------------------------------
-
-def _version():
- """
- Display a formatted version string for the module
- """
- print """PyGoogle %(__version__)s
-%(__copyright__)s
-released %(__date__)s
-
-Thanks to:
-%(__credits__)s""" % globals()
-
-
-def _usage():
- """
- Display usage information for the command-line interface
- """
- program = os.path.basename(sys.argv[0])
- print """Usage: %(program)s [options] [querytype] query
-
-options:
- -k, --key= <license key> Google license key (see important note below)
- -1, -l, --lucky show only first hit
- -m, --meta show meta information
- -r, --reverse show results in reverse order
- -x, --proxy= <url> use HTTP proxy
- -h, --help print this help
- -v, --version print version and copyright information
- -t, --test run test queries
-
-querytype:
- -s, --search= <query> search (default)
- -c, --cache= <url> retrieve cached page
- -p, --spelling= <word> check spelling
-
-IMPORTANT NOTE: all Google functions require a valid license key;
-visit http://www.google.com/apis/ to get one. %(program)s will look in
-these places (in order) and use the first license key it finds:
- * the key specified on the command line""" % vars()
- for get, location in _licenseLocations[2:]:
- print " *", location
-
-## ----------------------------------------------------------------------
-## utility functions (API)
-## ----------------------------------------------------------------------
-
-def setLicense(license_key):
- """
- Set the U{Google APIs <http://www.google.com/api>} license key
-
- @param license_key: The new key to use
- @type license_key: String
- @todo: validate the key?
- """
- global LICENSE_KEY
- LICENSE_KEY = license_key
-
-
-def getLicense(license_key = None):
- """
- Get the U{Google APIs <http://www.google.com/api>} license key
-
- The key can be read from any number of locations. See the module-leve
- documentation for the search order.
-
- @return: the license key
- @rtype: String
- @raise NoLicenseKey: if no valid key could be found
- """
- for get, location in _licenseLocations:
- rc = get(license_key)
- if rc: return rc
- _usage()
- raise NoLicenseKey, 'get a license key at http://www.google.com/apis/'
-
-
-def setProxy(http_proxy):
- """
- Set the HTTP proxy to be used when accessing Google
-
- @param http_proxy: the proxy to use
- @type http_proxy: String
- @todo: validiate the input?
- """
- global HTTP_PROXY
- HTTP_PROXY = http_proxy
-
-
-def getProxy(http_proxy = None):
- """
- Get the HTTP proxy we use for accessing Google
-
- @return: the proxy
- @rtype: String
- """
- return http_proxy or HTTP_PROXY
-
-
-def _contentsOf(dirname, filename):
- filename = os.path.join(dirname, filename)
- if not os.path.exists(filename): return None
- fsock = open(filename)
- contents = fsock.read()
- fsock.close()
- return contents
-
-
-def _getScriptDir():
- if __name__ == '__main__':
- return os.path.abspath(os.path.dirname(sys.argv[0]))
- else:
- return os.path.abspath(os.path.dirname(sys.modules[__name__].__file__))
-
-
-def _marshalBoolean(value):
- if value:
- return _true
- else:
- return _false
-
-
-def _getRemoteServer( http_proxy ):
- return GoogleSOAPFacade.getProxy( _url, _namespace, http_proxy )
-
-
-## ----------------------------------------------------------------------
-## search results classes
-## ----------------------------------------------------------------------
-
-class _SearchBase:
- def __init__(self, params):
- for k, v in params.items():
- if isinstance(v, GoogleSOAPFacade.structType):
- v = GoogleSOAPFacade.toDict( v )
-
- try:
- if isinstance(v[0], GoogleSOAPFacade.structType):
- v = [ SOAPProxy.toDict( node ) for node in v ]
-
- except:
- pass
- self.__dict__[str(k)] = v
-
-## ----------------------------------------------------------------------
-
-class SearchResultsMetaData(_SearchBase):
- """
- Container class for metadata about a given search query's results.
-
- @ivar documentFiltering: is duplicate page filtering active?
-
- @ivar searchComments: human-readable informational message
-
- example::
-
- "'the' is a very common word and was not included in your search"
-
- @ivar estimatedTotalResultsCount: estimated total number of results
- for this query.
-
- @ivar estimateIsExact: is estimatedTotalResultsCount an exact value?
-
- @ivar searchQuery: search string that initiated this search
-
- @ivar startIndex: index of the first result returned (zero-based)
-
- @ivar endIndex: index of the last result returned (zero-based)
-
- @ivar searchTips: human-readable informational message on how to better
- use Google.
-
- @ivar directoryCategories: list of categories for the search results
-
- This field is a list of dictionaries, like so::
-
- { 'fullViewableName': 'the Open Directory category',
- 'specialEncoding': 'encoding scheme of this directory category'
- }
-
- @ivar searchTime: total search time, in seconds
- """
- pass
-
-## ----------------------------------------------------------------------
-
-class SearchResult(_SearchBase):
- """
- Encapsulates the results from a search.
-
- @ivar URL: URL
-
- @ivar title: title (HTML)
-
- @ivar snippet: snippet showing query context (HTML
-
- @ivar cachedSize: size of cached version of this result, (KB)
-
- @ivar relatedInformationPresent: is the "related:" keyword supported?
-
- Flag indicates that the "related:" keyword is supported for this URL
-
- @ivar hostName: used when filtering occurs
-
- When filtering occurs, a maximum of two results from any given
- host is returned. When this occurs, the second resultElement
- that comes from that host contains the host name in this parameter.
-
- @ivar directoryCategory: Open Directory category information
-
- This field is a dictionary with the following values::
-
- { 'fullViewableName': 'the Open Directory category',
- 'specialEncoding' : 'encoding scheme of this directory category'
- }
-
- @ivar directoryTitle: Open Directory title of this result (or blank)
-
- @ivar summary: Open Directory summary for this result (or blank)
- """
- pass
-
-## ----------------------------------------------------------------------
-
-class SearchReturnValue:
- """
- complete search results for a single query
-
- @ivar meta: L{SearchResultsMetaData} instance for this query
-
- @ivar results: list of L{SearchResult} objects for this query
- """
- def __init__( self, metadata, results ):
- self.meta = metadata
- self.results = results
-
-## ----------------------------------------------------------------------
-## main functions
-## ----------------------------------------------------------------------
-
-def doGoogleSearch( q, start = 0, maxResults = 10, filter = 1,
- restrict='', safeSearch = 0, language = '',
- inputencoding = '', outputencoding = '',\
- license_key = None, http_proxy = None ):
- """
- Search Google using the SOAP API and return the results.
-
- You need a license key to call this function; see the
- U{Google APIs <http://www.google.com/apis/>} site to get one.
- Then you can either pass it to this function every time, or
- set it globally; see the L{google} module-level docs for details.
-
- See U{http://www.google.com/help/features.html}
- for examples of advanced features. Anything that works at the
- Google web site will work as a query string in this method.
-
- You can use the C{start} and C{maxResults} parameters to page
- through multiple pages of results. Note that 'maxResults' is
- currently limited by Google to 10.
-
- See the API reference for more advanced examples and a full list of
- country codes and topics for use in the C{restrict} parameter, along
- with legal values for the C{language}, C{inputencoding}, and
- C{outputencoding} parameters.
-
- You can download the API documentation
- U{http://www.google.com/apis/download.html <here>}.
-
- @param q: search string.
- @type q: String
-
- @param start: (optional) zero-based index of first desired result.
- @type start: int
-
- @param maxResults: (optional) maximum number of results to return.
- @type maxResults: int
-
- @param filter: (optional) flag to request filtering of similar results
- @type filter: int
-
- @param restrict: (optional) restrict results by country or topic.
- @type restrict: String
-
- @param safeSearch: (optional)
- @type safeSearch: int
-
- @param language: (optional)
- @type language: String
-
- @param inputencoding: (optional)
- @type inputencoding: String
-
- @param outputencoding: (optional)
- @type outputencoding: String
-
- @param license_key: (optional) the Google API license key to use
- @type license_key: String
-
- @param http_proxy: (optional) the HTTP proxy to use for talking to Google
- @type http_proxy: String
-
- @return: the search results encapsulated in an object
- @rtype: L{SearchReturnValue}
- """
- license_key = getLicense( license_key )
- http_proxy = getProxy( http_proxy )
- remoteserver = _getRemoteServer( http_proxy )
-
- filter = _marshalBoolean( filter )
- safeSearch = _marshalBoolean( safeSearch )
-
- data = remoteserver.doGoogleSearch( license_key, q, start, maxResults,
- filter, restrict, safeSearch,
- language, inputencoding,
- outputencoding )
-
- metadata = GoogleSOAPFacade.toDict( data )
- del metadata["resultElements"]
-
- metadata = SearchResultsMetaData( metadata )
-
- results = [ SearchResult( GoogleSOAPFacade.toDict( node ) ) \
- for node in data.resultElements ]
-
- return SearchReturnValue( metadata, results )
-
-## ----------------------------------------------------------------------
-
-def doGetCachedPage( url, license_key = None, http_proxy = None ):
- """
- Retrieve a page from the Google cache.
-
- You need a license key to call this function; see the
- U{Google APIs <http://www.google.com/apis/>} site to get one.
- Then you can either pass it to this function every time, or
- set it globally; see the L{google} module-level docs for details.
-
- @param url: full URL to the page to retrieve
- @type url: String
-
- @param license_key: (optional) the Google API key to use
- @type license_key: String
-
- @param http_proxy: (optional) the HTTP proxy server to use
- @type http_proxy: String
-
- @return: full text of the cached page
- @rtype: String
- """
- license_key = getLicense( license_key )
- http_proxy = getProxy( http_proxy )
- remoteserver = _getRemoteServer( http_proxy )
-
- return remoteserver.doGetCachedPage( license_key, url )
-
-## ----------------------------------------------------------------------
-
-def doSpellingSuggestion( phrase, license_key = None, http_proxy = None ):
- """
- Get spelling suggestions from Google
-
- You need a license key to call this function; see the
- U{Google APIs <http://www.google.com/apis/>} site to get one.
- Then you can either pass it to this function every time, or
- set it globally; see the L{google} module-level docs for details.
-
- @param phrase: word or phrase to spell-check
- @type phrase: String
-
- @param license_key: (optional) the Google API key to use
- @type license_key: String
-
- @param http_proxy: (optional) the HTTP proxy to use
- @type http_proxy: String
-
- @return: text of any suggested replacement, or None
- """
- license_key = getLicense( license_key )
- http_proxy = getProxy( http_proxy)
- remoteserver = _getRemoteServer( http_proxy )
-
- return remoteserver.doSpellingSuggestion( license_key, phrase )
-
-## ----------------------------------------------------------------------
-## functional test suite (see googletest.py for unit test suite)
-## ----------------------------------------------------------------------
-
-def _test():
- """
- Run functional test suite.
- """
- try:
- getLicense(None)
- except NoLicenseKey:
- return
-
- print "Searching for Python at google.com..."
- data = doGoogleSearch( "Python" )
- _output( data, { "func": "doGoogleSearch"} )
-
- print "\nSearching for 5 _French_ pages about Python, "
- print "encoded in ISO-8859-1..."
-
- data = doGoogleSearch( "Python", language = 'lang_fr',
- outputencoding = 'ISO-8859-1',
- maxResults = 5 )
-
- _output( data, { "func": "doGoogleSearch" } )
-
- phrase = "Pyhton programming languager"
- print "\nTesting spelling suggestions for '%s'..." % phrase
-
- data = doSpellingSuggestion( phrase )
-
- _output( data, { "func": "doSpellingSuggestion" } )
-
-## ----------------------------------------------------------------------
-## Command-line interface
-## ----------------------------------------------------------------------
-
-class _OutputFormatter:
- def boil(self, data):
- if type(data) == type(u""):
- return data.encode("ISO-8859-1", "replace")
- else:
- return data
-
-class _TextOutputFormatter(_OutputFormatter):
- def common(self, data, params):
- if params.get("showMeta", 0):
- meta = data.meta
- for category in meta.directoryCategories:
- print "directoryCategory: %s" % \
- self.boil(category["fullViewableName"])
- for attr in [node for node in dir(meta) if \
- node <> "directoryCategories" and node[:2] <> '__']:
- print "%s:" % attr, self.boil(getattr(meta, attr))
-
- def doGoogleSearch(self, data, params):
- results = data.results
- if params.get("feelingLucky", 0):
- results = results[:1]
- if params.get("reverseOrder", 0):
- results.reverse()
- for result in results:
- for attr in dir(result):
- if attr == "directoryCategory":
- print "directoryCategory:", \
- self.boil(result.directoryCategory["fullViewableName"])
- elif attr[:2] <> '__':
- print "%s:" % attr, self.boil(getattr(result, attr))
- print
- self.common(data, params)
-
- def doGetCachedPage(self, data, params):
- print data
- self.common(data, params)
-
- doSpellingSuggestion = doGetCachedPage
-
-def _makeFormatter(outputFormat):
- classname = "_%sOutputFormatter" % outputFormat.capitalize()
- return globals()[classname]()
-
-def _output(results, params):
- formatter = _makeFormatter(params.get("outputFormat", "text"))
- outputmethod = getattr(formatter, params["func"])
- outputmethod(results, params)
-
-def main(argv):
- """
- Command-line interface.
- """
- if not argv:
- _usage()
- return
- q = None
- func = None
- http_proxy = None
- license_key = None
- feelingLucky = 0
- showMeta = 0
- reverseOrder = 0
- runTest = 0
- outputFormat = "text"
- try:
- opts, args = getopt.getopt(argv, "s:c:p:k:lmrx:hvt1",
- ["search=", "cache=", "spelling=", "key=", "lucky", "meta",
- "reverse", "proxy=", "help", "version", "test"])
- except getopt.GetoptError:
- _usage()
- sys.exit(2)
- for opt, arg in opts:
- if opt in ("-s", "--search"):
- q = arg
- func = "doGoogleSearch"
- elif opt in ("-c", "--cache"):
- q = arg
- func = "doGetCachedPage"
- elif opt in ("-p", "--spelling"):
- q = arg
- func = "doSpellingSuggestion"
- elif opt in ("-k", "--key"):
- license_key = arg
- elif opt in ("-l", "-1", "--lucky"):
- feelingLucky = 1
- elif opt in ("-m", "--meta"):
- showMeta = 1
- elif opt in ("-r", "--reverse"):
- reverseOrder = 1
- elif opt in ("-x", "--proxy"):
- http_proxy = arg
- elif opt in ("-h", "--help"):
- _usage()
- elif opt in ("-v", "--version"):
- _version()
- elif opt in ("-t", "--test"):
- runTest = 1
- if runTest:
- setLicense(license_key)
- setProxy(http_proxy)
- _test()
- if args and not q:
- q = args[0]
- func = "doGoogleSearch"
- if func:
- results = globals()[func]( q, http_proxy=http_proxy,
- license_key=license_key )
- _output(results, locals())
-
-if __name__ == '__main__':
- main(sys.argv[1:])