Example: Using a paged query to retrieve a large number of records
The Python example below implements a paged query that uses multiple requests to retrieve a large number of operation records in batches of 50 per request. This query retrieves records for all create operations on objects in the /customers/widgetco/orders directory in the default namespace and returns basic information for each record.
The query uses a JSON request body and requests results in JSON format.
#!/usr/bin/env python # encoding: utf-8 import pycurl import StringIO import time import json class OperationBasedQueryTool(): queryArguments = {'operation': {'count': 1, 'verbose': 'false', 'objectProperties': 'utf8Name, type, size', 'systemMetadata': {'changeTime': {}, 'directories': {'directory': []}, 'namespaces': {'namespace': []}, 'transactions': {'transaction': []}}}} def __init__(self): self.complete = False def setConnectionInfo(self, authToken, hostName, urlName): """ Set all connection info for subsequent query requests. @param authToken: authorization token @param hostName: Hostname of the target cluster @param urlName: Full URL for the query interface """ self.curl = pycurl.Curl() requestHeaders = {pycurl.HTTPHEADER :["Authorization: HCP "authToken, "Accept:application/json", "Content-Type: application/json", "Host: admin.%s" % (hostName)]} self.curl.setopt(pycurl.FAILONERROR, 1) self.curl.setopt(pycurl.HTTPHEADER, requestHeaders[pycurl.HTTPHEADER]) self.curl.setopt(pycurl.URL, urlName) for header, value in requestHeaders.iteritems(): self.curl.setopt(header, value) self.curl.setopt(pycurl.CUSTOMREQUEST, 'POST') self.curl.setopt(pycurl.SSL_VERIFYPEER, 0) self.curl.setopt(pycurl.SSL_VERIFYHOST, 0) self.curl.setopt(pycurl.VERBOSE, 0) def setQueryParameters(self, count, verbose, directories, namespaces, transactions, objectProperties, startTimeMillis=0, endTimeMillis=int(round(time.time() * 1000))): """ Set all parameters related to the query. @param count: The number of results to return for each query. @param verbose: Indication to return all object property values. Value is either true or false. @param directories: Dictionary containing list of directory paths. @param namespaces: Dictionary containing list of namespaces. @param transactions: Dictionary containing list of transaction types. @param objectProperties: String containing comma-separated list of object properties to return for each operation record. @param startTimeMillis: The starting timestamp in milliseconds of the query window. Default is 0 (zero). @param endTimeMillis: The ending timestamp in milliseconds of the query window. Default is one minute before time of request. """ self.queryArguments['operation']['count'] = count self.queryArguments['operation']['objectProperties'] = objectProperties self.queryArguments['operation']['verbose'] = verbose self.queryArguments['operation']['systemMetadata']['directories'] = directories self.queryArguments['operation']['systemMetadata']['namespaces'] = namespaces self.queryArguments['operation']['systemMetadata']['transactions'] = transactions self.queryArguments['operation']['systemMetadata']['changeTime'] ['start'] = startTimeMillis self.queryArguments['operation']['systemMetadata']['changeTime'] ['end'] = endTimeMillis def issueQuery(self): """ Issue an operation-based query request. """ self.curl.setopt(pycurl.POSTFIELDS, json.dumps(self.queryArguments)) cout = StringIO.StringIO() self.curl.setopt(pycurl.WRITEFUNCTION, cout.write) print("Performing query with the following arguments: %s" % json.dumps(self.queryArguments)) self.curl.perform() responseCode = self.curl.getinfo(pycurl.RESPONSE_CODE) if responseCode == 200: queryResult = eval(cout.getvalue()) if queryResult['queryResult']['status']['code'] == "COMPLETE": self.complete = True cout.close() return queryResult else: raise Exception("Error: Expected result code 200, but received %s" % responseCode) def setLastResult(self, lastResult): """ Sets the last result we received as the starting point for the next query we issue. @param lastResult: The dictionary containing the last result returned by the previous query. """ self.queryArguments['operation']['lastResult'] = dict() self.queryArguments['operation']['lastResult']['urlName'] = lastResult['urlName'] self.queryArguments['operation']['lastResult'] ['changeTimeMilliseconds'] = lastResult['changeTimeMilliseconds'] self.queryArguments['operation']['lastResult']['version'] = str(lastResult['version']) def closeConnection(self): """ Cleanup the curl connection after we are finished with it. """ self.curl.close() if __name__ == '__main__': authToken = "bXl1c2Vy:3f3c6784e97531774380db177774ac8d" hostName = "clusterName.com" urlName = "https://admin.%s/query" % hostName resultsPerQuery = 50 objectUrls = [] queryTool = OperationBasedQueryTool() queryTool.setConnectionInfo(authToken, hostName, urlName) queryTool.setQueryParameters(resultsPerQuery, "false", {'directory':['/customers/widgetco/orders']}, {'namespace':['Default.Default']}, {'transaction':['create']}) try: while not queryTool.complete: queryResults = queryTool.issueQuery() for result in queryResults['queryResult']['resultSet']: objectUrls.append(result['urlName']) resultCount = len(queryResults['queryResult']['resultSet']) queryTool.setLastResult(queryResults['queryResult']['resultSet'] [resultCount-1]) print("Query completed. Total objects found: %d" % len(objectUrls)) finally: queryTool.closeConnection()