From cc1bc641e7261834a3ac1d02d7f624735d9daa7e Mon Sep 17 00:00:00 2001 From: Frances Wong Date: Mon, 28 Apr 2025 16:11:47 -0600 Subject: [PATCH 01/21] [#15] start of new python client docs --- docs/Makefile | 20 ++++++++++++++++++ docs/conf.py | 27 +++++++++++++++++++++++++ docs/index.rst | 12 +++++++++++ docs/installation.rst | 29 +++++++++++++++++++++++++++ docs/quickstart.rst | 39 ++++++++++++++++++++++++++++++++++++ docs/spreadsheet_example.rst | 6 ++++++ 6 files changed, 133 insertions(+) create mode 100644 docs/Makefile create mode 100644 docs/conf.py create mode 100644 docs/index.rst create mode 100644 docs/installation.rst create mode 100644 docs/quickstart.rst create mode 100644 docs/spreadsheet_example.rst diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 0000000..d4bb2cb --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = . +BUILDDIR = _build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/conf.py b/docs/conf.py new file mode 100644 index 0000000..951a1d1 --- /dev/null +++ b/docs/conf.py @@ -0,0 +1,27 @@ +# Configuration file for the Sphinx documentation builder. +# +# For the full list of built-in configuration values, see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# -- Project information ----------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information + +project = "lexmachina-client" +copyright = "2025, support@lexmachina.com" +author = "support@lexmachina.com" +release = "2.0.0.20250318" + +# -- General configuration --------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration + +extensions = [] + +templates_path = ["_templates"] +exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] + + +# -- Options for HTML output ------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output + +html_theme = "alabaster" +html_static_path = ["_static"] diff --git a/docs/index.rst b/docs/index.rst new file mode 100644 index 0000000..4a1c71f --- /dev/null +++ b/docs/index.rst @@ -0,0 +1,12 @@ +lexmachina-client documentation +=============================== + + +.. toctree:: + :maxdepth: 2 + + installation + quickstart + spreadsheet_example + :caption: Contents: + diff --git a/docs/installation.rst b/docs/installation.rst new file mode 100644 index 0000000..b90deb6 --- /dev/null +++ b/docs/installation.rst @@ -0,0 +1,29 @@ +Installation +============ + +Install using pip within a virtual environment: + +.. code-block:: bash + + $ pip install lexmachina-client + + +The examples in the quickstart assume a valid bearer token is set in the environment variable ``BEARER_TOKEN``. To get a bearer token use the API's ``/oauth2/token`` endpoint: + +.. code-block:: bash + + $ curl -i -X POST 'https://api.lexmachina.com/oauth2/token' \ + --header 'Content-Type: application/x-www-form-urlencoded' \ + --data-urlencode 'client_id=' \ + --data-urlencode 'client_secret=' \ + --data-urlencode 'grant_type=client_credentials' + + +Then set that bearer token in the environment variable: + +.. code-block:: bash + + $ export BEARER_TOKEN= + + + diff --git a/docs/quickstart.rst b/docs/quickstart.rst new file mode 100644 index 0000000..d5333e9 --- /dev/null +++ b/docs/quickstart.rst @@ -0,0 +1,39 @@ +Quickstart +========== + +After following the installation instructions and setting your bearer token in the environment variable ``BEARER_TOKEN``, you are ready to start! + +For this example, we'll search for a case and get the case details. + +First, we'll configure the client: + +.. code-block:: python + + import lexmachina + import os + + configuration = lexmachina.Configuration( + host="https://api.lexmachina.com", access_token=os.environ["BEARER_TOKEN"] + ) + api_client = lexmachina.ApiClient(configuration) + + +Now let's search for a case. For this example, we'll search for case number 9:02-cv-00058-JH, which is Samsung Electronics v. Sandisk Corporation in the U.S. District Court for the Eastern District of Texas. + + +.. code-block:: python + + case_search_results = fed_cases_api.find_district_case_by_number( + case_numbers=["9:02-cv-00058-JH"], court="txed" + ) + case_result = case_search_results[0] + + + + + + + + + + diff --git a/docs/spreadsheet_example.rst b/docs/spreadsheet_example.rst new file mode 100644 index 0000000..75743e6 --- /dev/null +++ b/docs/spreadsheet_example.rst @@ -0,0 +1,6 @@ +Adding search results to a spreadsheet +====================================== + + +In this simple example we will be searching for antitrust cases terminated in 2024, do a little analysis, and add the results to a spreadsheet. + From 29a88772ebdec70db91e2810bd8411d592c7afa1 Mon Sep 17 00:00:00 2001 From: Frances Wong Date: Mon, 5 May 2025 11:21:06 -0600 Subject: [PATCH 02/21] [#15] use a context manager for API client in tests --- tests/test_case_search.py | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/tests/test_case_search.py b/tests/test_case_search.py index 399755f..7f260da 100644 --- a/tests/test_case_search.py +++ b/tests/test_case_search.py @@ -10,22 +10,22 @@ def api_client(): configuration = lexmachina.Configuration( host="https://api.lexmachina.com", access_token=os.environ["BEARER_TOKEN"] ) - api_client = lexmachina.ApiClient(configuration) - yield api_client + with lexmachina.ApiClient(configuration) as api_client: + yield api_client @pytest.fixture(scope="module") -def fed_cases_api(api_client): +def fed_cases_api_instance(api_client): yield lexmachina.FederalDistrictCasesApi(api_client) @pytest.fixture(scope="module") -def state_cases_api(api_client): +def state_cases_api_instance(api_client): yield lexmachina.StateCasesApi(api_client) -def test_fed_case_number_search(fed_cases_api): - case_search_results = fed_cases_api.find_district_case_by_number( +def test_fed_case_number_search(fed_cases_api_instance): + case_search_results = fed_cases_api_instance.find_district_case_by_number( case_numbers=["1:11-cv-11681-NMG"], court="mad" ) case_result = case_search_results[0] @@ -33,7 +33,7 @@ def test_fed_case_number_search(fed_cases_api): assert 2000026401 in [match.district_case_id for match in case_result.matches] -def test_fed_case_query(fed_cases_api): +def test_fed_case_query(fed_cases_api_instance): query = { "caseStatus": "Terminated", "courts": {"include": ["mad"]}, @@ -48,19 +48,19 @@ def test_fed_case_query(fed_cases_api): "pageSize": 5, } fed_case_query = lexmachina.DistrictCaseQuery.from_dict(query) - fed_case_query_result = fed_cases_api.query_district_cases(fed_case_query) + fed_case_query_result = fed_cases_api_instance.query_district_cases(fed_case_query) assert 1 < len(fed_case_query_result.cases) < 5 case_ids = [case_ref.district_case_id for case_ref in fed_case_query_result.cases] assert 2000026401 in case_ids -def test_get_fed_case(fed_cases_api): - case_data = fed_cases_api.get_district_case(2000026401) +def test_get_fed_case(fed_cases_api_instance): + case_data = fed_cases_api_instance.get_district_case(2000026401) law_firm_names = [firm.name for firm in case_data.law_firms] assert "Morrison & Foerster" in law_firm_names -def test_state_case_query(state_cases_api): +def test_state_case_query(state_cases_api_instance): query = { "caseStatus": "Terminated", "courts": {"include": ["Court of Chancery"], "state": "DE"}, @@ -75,7 +75,9 @@ def test_state_case_query(state_cases_api): "pageSize": 5, } state_case_query = lexmachina.StateCaseQuery.from_dict(query) - state_case_query_result = state_cases_api.query_state_cases(state_case_query) + state_case_query_result = state_cases_api_instance.query_state_cases( + state_case_query + ) assert 1 <= len(state_case_query_result.cases) < 5 state_case_ids = [ case_ref.state_case_id for case_ref in state_case_query_result.cases @@ -83,7 +85,7 @@ def test_state_case_query(state_cases_api): assert 2034871656 in state_case_ids -def test_get_state_case(state_cases_api): - case_data = state_cases_api.get_state_case(2034871656) +def test_get_state_case(state_cases_api_instance): + case_data = state_cases_api_instance.get_state_case(2034871656) law_firm_names = [firm.name for firm in case_data.law_firms] assert "Skadden, Arps, Slate, Meagher & Flom" in law_firm_names From c906e299d49ab6a5bb2984bd0278be877152cb5a Mon Sep 17 00:00:00 2001 From: Frances Wong Date: Mon, 5 May 2025 12:03:25 -0600 Subject: [PATCH 03/21] [#15] Add custom template so context manager actually does something --- custom_templates/api_client.mustache | 815 +++++++++++++++++++++++++++ 1 file changed, 815 insertions(+) create mode 100644 custom_templates/api_client.mustache diff --git a/custom_templates/api_client.mustache b/custom_templates/api_client.mustache new file mode 100644 index 0000000..15990d0 --- /dev/null +++ b/custom_templates/api_client.mustache @@ -0,0 +1,815 @@ +# coding: utf-8 + +{{>partial_header}} + +import datetime +from dateutil.parser import parse +from enum import Enum +import decimal +import json +import mimetypes +import os +import re +import tempfile + +from urllib.parse import quote +from typing import Tuple, Optional, List, Dict, Union +from pydantic import SecretStr +{{#tornado}} +import tornado.gen +{{/tornado}} + +from {{packageName}}.configuration import Configuration +from {{packageName}}.api_response import ApiResponse, T as ApiResponseT +import {{modelPackage}} +from {{packageName}} import rest +from {{packageName}}.exceptions import ( + ApiValueError, + ApiException, + BadRequestException, + UnauthorizedException, + ForbiddenException, + NotFoundException, + ServiceException +) + +RequestSerialized = Tuple[str, str, Dict[str, str], Optional[str], List[str]] + +class ApiClient: + """Generic API client for OpenAPI client library builds. + + OpenAPI generic API client. This client handles the client- + server communication, and is invariant across implementations. Specifics of + the methods and models for each application are generated from the OpenAPI + templates. + + :param configuration: .Configuration object for this client + :param header_name: a header to pass when making calls to the API. + :param header_value: a header value to pass when making calls to + the API. + :param cookie: a cookie to include in the header when making calls + to the API + """ + + PRIMITIVE_TYPES = (float, bool, bytes, str, int) + NATIVE_TYPES_MAPPING = { + 'int': int, + 'long': int, # TODO remove as only py3 is supported? + 'float': float, + 'str': str, + 'bool': bool, + 'date': datetime.date, + 'datetime': datetime.datetime, + 'decimal': decimal.Decimal, + 'object': object, + } + _pool = None + + def __init__( + self, + configuration=None, + header_name=None, + header_value=None, + cookie=None + ) -> None: + # use default configuration if none is provided + if configuration is None: + configuration = Configuration.get_default() + self.configuration = configuration + + self.rest_client = rest.RESTClientObject(configuration) + self.default_headers = {} + if header_name is not None: + self.default_headers[header_name] = header_value + self.cookie = cookie + # Set default User-Agent. + self.user_agent = '{{{httpUserAgent}}}{{^httpUserAgent}}OpenAPI-Generator/{{{packageVersion}}}/python{{/httpUserAgent}}' + self.client_side_validation = configuration.client_side_validation + +{{#asyncio}} + async def __aenter__(self): + return self + + async def __aexit__(self, exc_type, exc_value, traceback): + await self.close() + + async def close(self): + await self.rest_client.close() +{{/asyncio}} +{{^asyncio}} + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_value, traceback): + self.rest_client.pool_manager.clear() + self.configuration = None +{{/asyncio}} + + @property + def user_agent(self): + """User agent for this API client""" + return self.default_headers['User-Agent'] + + @user_agent.setter + def user_agent(self, value): + self.default_headers['User-Agent'] = value + + def set_default_header(self, header_name, header_value): + self.default_headers[header_name] = header_value + + + _default = None + + @classmethod + def get_default(cls): + """Return new instance of ApiClient. + + This method returns newly created, based on default constructor, + object of ApiClient class or returns a copy of default + ApiClient. + + :return: The ApiClient object. + """ + if cls._default is None: + cls._default = ApiClient() + return cls._default + + @classmethod + def set_default(cls, default): + """Set default instance of ApiClient. + + It stores default ApiClient. + + :param default: object of ApiClient. + """ + cls._default = default + + def param_serialize( + self, + method, + resource_path, + path_params=None, + query_params=None, + header_params=None, + body=None, + post_params=None, + files=None, auth_settings=None, + collection_formats=None, + _host=None, + _request_auth=None + ) -> RequestSerialized: + + """Builds the HTTP request params needed by the request. + :param method: Method to call. + :param resource_path: Path to method endpoint. + :param path_params: Path parameters in the url. + :param query_params: Query parameters in the url. + :param header_params: Header parameters to be + placed in the request header. + :param body: Request body. + :param post_params dict: Request post form parameters, + for `application/x-www-form-urlencoded`, `multipart/form-data`. + :param auth_settings list: Auth Settings names for the request. + :param files dict: key -> filename, value -> filepath, + for `multipart/form-data`. + :param collection_formats: dict of collection formats for path, query, + header, and post parameters. + :param _request_auth: set to override the auth_settings for an a single + request; this effectively ignores the authentication + in the spec for a single request. + :return: tuple of form (path, http_method, query_params, header_params, + body, post_params, files) + """ + + config = self.configuration + + # header parameters + header_params = header_params or {} + header_params.update(self.default_headers) + if self.cookie: + header_params['Cookie'] = self.cookie + if header_params: + header_params = self.sanitize_for_serialization(header_params) + header_params = dict( + self.parameters_to_tuples(header_params,collection_formats) + ) + + # path parameters + if path_params: + path_params = self.sanitize_for_serialization(path_params) + path_params = self.parameters_to_tuples( + path_params, + collection_formats + ) + for k, v in path_params: + # specified safe chars, encode everything + resource_path = resource_path.replace( + '{%s}' % k, + quote(str(v), safe=config.safe_chars_for_path_param) + ) + + # post parameters + if post_params or files: + post_params = post_params if post_params else [] + post_params = self.sanitize_for_serialization(post_params) + post_params = self.parameters_to_tuples( + post_params, + collection_formats + ) + if files: + post_params.extend(self.files_parameters(files)) + + # auth setting + self.update_params_for_auth( + header_params, + query_params, + auth_settings, + resource_path, + method, + body, + request_auth=_request_auth + ) + + # body + if body: + body = self.sanitize_for_serialization(body) + + # request url + if _host is None or self.configuration.ignore_operation_servers: + url = self.configuration.host + resource_path + else: + # use server/host defined in path or operation instead + url = _host + resource_path + + # query parameters + if query_params: + query_params = self.sanitize_for_serialization(query_params) + url_query = self.parameters_to_url_query( + query_params, + collection_formats + ) + url += "?" + url_query + + return method, url, header_params, body, post_params + + + {{#tornado}} + @tornado.gen.coroutine + {{/tornado}} + {{#asyncio}}async {{/asyncio}}def call_api( + self, + method, + url, + header_params=None, + body=None, + post_params=None, + _request_timeout=None + ) -> rest.RESTResponse: + """Makes the HTTP request (synchronous) + :param method: Method to call. + :param url: Path to method endpoint. + :param header_params: Header parameters to be + placed in the request header. + :param body: Request body. + :param post_params dict: Request post form parameters, + for `application/x-www-form-urlencoded`, `multipart/form-data`. + :param _request_timeout: timeout setting for this request. + :return: RESTResponse + """ + + try: + # perform request and return response + response_data = {{#asyncio}}await {{/asyncio}}{{#tornado}}yield {{/tornado}}self.rest_client.request( + method, url, + headers=header_params, + body=body, post_params=post_params, + _request_timeout=_request_timeout + ) + + except ApiException as e: + raise e + + return response_data + + def response_deserialize( + self, + response_data: rest.RESTResponse, + response_types_map: Optional[Dict[str, ApiResponseT]]=None + ) -> ApiResponse[ApiResponseT]: + """Deserializes response into an object. + :param response_data: RESTResponse object to be deserialized. + :param response_types_map: dict of response types. + :return: ApiResponse + """ + + msg = "RESTResponse.read() must be called before passing it to response_deserialize()" + assert response_data.data is not None, msg + + response_type = response_types_map.get(str(response_data.status), None) + if not response_type and isinstance(response_data.status, int) and 100 <= response_data.status <= 599: + # if not found, look for '1XX', '2XX', etc. + response_type = response_types_map.get(str(response_data.status)[0] + "XX", None) + + # deserialize response data + response_text = None + return_data = None + try: + if response_type == "bytearray": + return_data = response_data.data + elif response_type == "file": + return_data = self.__deserialize_file(response_data) + elif response_type is not None: + match = None + content_type = response_data.getheader('content-type') + if content_type is not None: + match = re.search(r"charset=([a-zA-Z\-\d]+)[\s;]?", content_type) + encoding = match.group(1) if match else "utf-8" + response_text = response_data.data.decode(encoding) + return_data = self.deserialize(response_text, response_type, content_type) + finally: + if not 200 <= response_data.status <= 299: + raise ApiException.from_response( + http_resp=response_data, + body=response_text, + data=return_data, + ) + + return ApiResponse( + status_code = response_data.status, + data = return_data, + headers = response_data.getheaders(), + raw_data = response_data.data + ) + + def sanitize_for_serialization(self, obj): + """Builds a JSON POST object. + + If obj is None, return None. + If obj is SecretStr, return obj.get_secret_value() + If obj is str, int, long, float, bool, return directly. + If obj is datetime.datetime, datetime.date + convert to string in iso8601 format. + If obj is decimal.Decimal return string representation. + If obj is list, sanitize each element in the list. + If obj is dict, return the dict. + If obj is OpenAPI model, return the properties dict. + + :param obj: The data to serialize. + :return: The serialized form of data. + """ + if obj is None: + return None + elif isinstance(obj, Enum): + return obj.value + elif isinstance(obj, SecretStr): + return obj.get_secret_value() + elif isinstance(obj, self.PRIMITIVE_TYPES): + return obj + elif isinstance(obj, list): + return [ + self.sanitize_for_serialization(sub_obj) for sub_obj in obj + ] + elif isinstance(obj, tuple): + return tuple( + self.sanitize_for_serialization(sub_obj) for sub_obj in obj + ) + elif isinstance(obj, (datetime.datetime, datetime.date)): + return obj.isoformat() + elif isinstance(obj, decimal.Decimal): + return str(obj) + + elif isinstance(obj, dict): + obj_dict = obj + else: + # Convert model obj to dict except + # attributes `openapi_types`, `attribute_map` + # and attributes which value is not None. + # Convert attribute name to json key in + # model definition for request. + if hasattr(obj, 'to_dict') and callable(getattr(obj, 'to_dict')): + obj_dict = obj.to_dict() + else: + obj_dict = obj.__dict__ + + return { + key: self.sanitize_for_serialization(val) + for key, val in obj_dict.items() + } + + def deserialize(self, response_text: str, response_type: str, content_type: Optional[str]): + """Deserializes response into an object. + + :param response: RESTResponse object to be deserialized. + :param response_type: class literal for + deserialized object, or string of class name. + :param content_type: content type of response. + + :return: deserialized object. + """ + + # fetch data from response object + if content_type is None: + try: + data = json.loads(response_text) + except ValueError: + data = response_text + elif re.match(r'^application/(json|[\w!#$&.+-^_]+\+json)\s*(;|$)', content_type, re.IGNORECASE): + if response_text == "": + data = "" + else: + data = json.loads(response_text) + elif re.match(r'^text\/[a-z.+-]+\s*(;|$)', content_type, re.IGNORECASE): + data = response_text + else: + raise ApiException( + status=0, + reason="Unsupported content type: {0}".format(content_type) + ) + + return self.__deserialize(data, response_type) + + def __deserialize(self, data, klass): + """Deserializes dict, list, str into an object. + + :param data: dict, list or str. + :param klass: class literal, or string of class name. + + :return: object. + """ + if data is None: + return None + + if isinstance(klass, str): + if klass.startswith('List['): + m = re.match(r'List\[(.*)]', klass) + assert m is not None, "Malformed List type definition" + sub_kls = m.group(1) + return [self.__deserialize(sub_data, sub_kls) + for sub_data in data] + + if klass.startswith('Dict['): + m = re.match(r'Dict\[([^,]*), (.*)]', klass) + assert m is not None, "Malformed Dict type definition" + sub_kls = m.group(2) + return {k: self.__deserialize(v, sub_kls) + for k, v in data.items()} + + # convert str to class + if klass in self.NATIVE_TYPES_MAPPING: + klass = self.NATIVE_TYPES_MAPPING[klass] + else: + klass = getattr({{modelPackage}}, klass) + + if klass in self.PRIMITIVE_TYPES: + return self.__deserialize_primitive(data, klass) + elif klass == object: + return self.__deserialize_object(data) + elif klass == datetime.date: + return self.__deserialize_date(data) + elif klass == datetime.datetime: + return self.__deserialize_datetime(data) + elif klass == decimal.Decimal: + return decimal.Decimal(data) + elif issubclass(klass, Enum): + return self.__deserialize_enum(data, klass) + else: + return self.__deserialize_model(data, klass) + + def parameters_to_tuples(self, params, collection_formats): + """Get parameters as list of tuples, formatting collections. + + :param params: Parameters as dict or list of two-tuples + :param dict collection_formats: Parameter collection formats + :return: Parameters as list of tuples, collections formatted + """ + new_params: List[Tuple[str, str]] = [] + if collection_formats is None: + collection_formats = {} + for k, v in params.items() if isinstance(params, dict) else params: + if k in collection_formats: + collection_format = collection_formats[k] + if collection_format == 'multi': + new_params.extend((k, value) for value in v) + else: + if collection_format == 'ssv': + delimiter = ' ' + elif collection_format == 'tsv': + delimiter = '\t' + elif collection_format == 'pipes': + delimiter = '|' + else: # csv is the default + delimiter = ',' + new_params.append( + (k, delimiter.join(str(value) for value in v))) + else: + new_params.append((k, v)) + return new_params + + def parameters_to_url_query(self, params, collection_formats): + """Get parameters as list of tuples, formatting collections. + + :param params: Parameters as dict or list of two-tuples + :param dict collection_formats: Parameter collection formats + :return: URL query string (e.g. a=Hello%20World&b=123) + """ + new_params: List[Tuple[str, str]] = [] + if collection_formats is None: + collection_formats = {} + for k, v in params.items() if isinstance(params, dict) else params: + if isinstance(v, bool): + v = str(v).lower() + if isinstance(v, (int, float)): + v = str(v) + if isinstance(v, dict): + v = json.dumps(v) + + if k in collection_formats: + collection_format = collection_formats[k] + if collection_format == 'multi': + new_params.extend((k, quote(str(value))) for value in v) + else: + if collection_format == 'ssv': + delimiter = ' ' + elif collection_format == 'tsv': + delimiter = '\t' + elif collection_format == 'pipes': + delimiter = '|' + else: # csv is the default + delimiter = ',' + new_params.append( + (k, delimiter.join(quote(str(value)) for value in v)) + ) + else: + new_params.append((k, quote(str(v)))) + + return "&".join(["=".join(map(str, item)) for item in new_params]) + + def files_parameters( + self, + files: Dict[str, Union[str, bytes, List[str], List[bytes], Tuple[str, bytes]]], + ): + """Builds form parameters. + + :param files: File parameters. + :return: Form parameters with files. + """ + params = [] + for k, v in files.items(): + if isinstance(v, str): + with open(v, 'rb') as f: + filename = os.path.basename(f.name) + filedata = f.read() + elif isinstance(v, bytes): + filename = k + filedata = v + elif isinstance(v, tuple): + filename, filedata = v + elif isinstance(v, list): + for file_param in v: + params.extend(self.files_parameters({k: file_param})) + continue + else: + raise ValueError("Unsupported file value") + mimetype = ( + mimetypes.guess_type(filename)[0] + or 'application/octet-stream' + ) + params.append( + tuple([k, tuple([filename, filedata, mimetype])]) + ) + return params + + def select_header_accept(self, accepts: List[str]) -> Optional[str]: + """Returns `Accept` based on an array of accepts provided. + + :param accepts: List of headers. + :return: Accept (e.g. application/json). + """ + if not accepts: + return None + + for accept in accepts: + if re.search('json', accept, re.IGNORECASE): + return accept + + return accepts[0] + + def select_header_content_type(self, content_types): + """Returns `Content-Type` based on an array of content_types provided. + + :param content_types: List of content-types. + :return: Content-Type (e.g. application/json). + """ + if not content_types: + return None + + for content_type in content_types: + if re.search('json', content_type, re.IGNORECASE): + return content_type + + return content_types[0] + + def update_params_for_auth( + self, + headers, + queries, + auth_settings, + resource_path, + method, + body, + request_auth=None + ) -> None: + """Updates header and query params based on authentication setting. + + :param headers: Header parameters dict to be updated. + :param queries: Query parameters tuple list to be updated. + :param auth_settings: Authentication setting identifiers list. + :resource_path: A string representation of the HTTP request resource path. + :method: A string representation of the HTTP request method. + :body: A object representing the body of the HTTP request. + The object type is the return value of sanitize_for_serialization(). + :param request_auth: if set, the provided settings will + override the token in the configuration. + """ + if not auth_settings: + return + + if request_auth: + self._apply_auth_params( + headers, + queries, + resource_path, + method, + body, + request_auth + ) + else: + for auth in auth_settings: + auth_setting = self.configuration.auth_settings().get(auth) + if auth_setting: + self._apply_auth_params( + headers, + queries, + resource_path, + method, + body, + auth_setting + ) + + def _apply_auth_params( + self, + headers, + queries, + resource_path, + method, + body, + auth_setting + ) -> None: + """Updates the request parameters based on a single auth_setting + + :param headers: Header parameters dict to be updated. + :param queries: Query parameters tuple list to be updated. + :resource_path: A string representation of the HTTP request resource path. + :method: A string representation of the HTTP request method. + :body: A object representing the body of the HTTP request. + The object type is the return value of sanitize_for_serialization(). + :param auth_setting: auth settings for the endpoint + """ + if auth_setting['in'] == 'cookie': + headers['Cookie'] = auth_setting['value'] + elif auth_setting['in'] == 'header': + if auth_setting['type'] != 'http-signature': + headers[auth_setting['key']] = auth_setting['value'] + {{#hasHttpSignatureMethods}} + else: + # The HTTP signature scheme requires multiple HTTP headers + # that are calculated dynamically. + signing_info = self.configuration.signing_info + auth_headers = signing_info.get_http_signature_headers( + resource_path, method, headers, body, queries) + headers.update(auth_headers) + {{/hasHttpSignatureMethods}} + elif auth_setting['in'] == 'query': + queries.append((auth_setting['key'], auth_setting['value'])) + else: + raise ApiValueError( + 'Authentication token must be in `query` or `header`' + ) + + def __deserialize_file(self, response): + """Deserializes body to file + + Saves response body into a file in a temporary folder, + using the filename from the `Content-Disposition` header if provided. + + handle file downloading + save response body into a tmp file and return the instance + + :param response: RESTResponse. + :return: file path. + """ + fd, path = tempfile.mkstemp(dir=self.configuration.temp_folder_path) + os.close(fd) + os.remove(path) + + content_disposition = response.getheader("Content-Disposition") + if content_disposition: + m = re.search( + r'filename=[\'"]?([^\'"\s]+)[\'"]?', + content_disposition + ) + assert m is not None, "Unexpected 'content-disposition' header value" + filename = m.group(1) + path = os.path.join(os.path.dirname(path), filename) + + with open(path, "wb") as f: + f.write(response.data) + + return path + + def __deserialize_primitive(self, data, klass): + """Deserializes string to primitive type. + + :param data: str. + :param klass: class literal. + + :return: int, long, float, str, bool. + """ + try: + return klass(data) + except UnicodeEncodeError: + return str(data) + except TypeError: + return data + + def __deserialize_object(self, value): + """Return an original value. + + :return: object. + """ + return value + + def __deserialize_date(self, string): + """Deserializes string to date. + + :param string: str. + :return: date. + """ + try: + return parse(string).date() + except ImportError: + return string + except ValueError: + raise rest.ApiException( + status=0, + reason="Failed to parse `{0}` as date object".format(string) + ) + + def __deserialize_datetime(self, string): + """Deserializes string to datetime. + + The string should be in iso8601 datetime format. + + :param string: str. + :return: datetime. + """ + try: + return parse(string) + except ImportError: + return string + except ValueError: + raise rest.ApiException( + status=0, + reason=( + "Failed to parse `{0}` as datetime object" + .format(string) + ) + ) + + def __deserialize_enum(self, data, klass): + """Deserializes primitive type to enum. + + :param data: primitive type. + :param klass: class literal. + :return: enum value. + """ + try: + return klass(data) + except ValueError: + raise rest.ApiException( + status=0, + reason=( + "Failed to parse `{0}` as `{1}`" + .format(data, klass) + ) + ) + + def __deserialize_model(self, data, klass): + """Deserializes list or dict to model. + + :param data: dict, list. + :param klass: class literal. + :return: model object. + """ + + return klass.from_dict(data) From 843aa131c796c5168efdf1dad6ff10c9a419771c Mon Sep 17 00:00:00 2001 From: Frances Wong Date: Mon, 5 May 2025 12:19:23 -0600 Subject: [PATCH 04/21] [#15] Update actual client model to do something with it exits --- src/lexmachina/api_client.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/lexmachina/api_client.py b/src/lexmachina/api_client.py index a8df1e7..a2be914 100644 --- a/src/lexmachina/api_client.py +++ b/src/lexmachina/api_client.py @@ -97,7 +97,8 @@ def __enter__(self): return self def __exit__(self, exc_type, exc_value, traceback): - pass + self.rest_client.pool_manager.clear() + self.configuration = None @property def user_agent(self): From ea58a02f2908d365deaa5a4bcc2f8fe68e5b0642 Mon Sep 17 00:00:00 2001 From: Frances Wong Date: Mon, 5 May 2025 14:49:37 -0600 Subject: [PATCH 05/21] [#15] Flesh out quickstart --- docs/quickstart.rst | 82 ++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 73 insertions(+), 9 deletions(-) diff --git a/docs/quickstart.rst b/docs/quickstart.rst index d5333e9..a682cdd 100644 --- a/docs/quickstart.rst +++ b/docs/quickstart.rst @@ -3,9 +3,15 @@ Quickstart After following the installation instructions and setting your bearer token in the environment variable ``BEARER_TOKEN``, you are ready to start! -For this example, we'll search for a case and get the case details. +For this example, we'll search for a case and get its details. -First, we'll configure the client: +To get detailed information on an individual case, we'll need the Lex Machina id for that case. One way to get the case's id is to search for it by case number. + +In the first code block, you will see the following steps: + +#. We configure the client. We'll use a context manager to take advantage of the automatic cleanup it does for us (clearing out the connection pool and the configuration). +#. Using the configured client, we create an object with access to the Federal District Case endpoints. +#. We do a case number search. For this example, we search for the Samsung Electronics v. Sandisk Corporation, case number 9:02-cv-00058-JH. The case number search ignores judge initials at the end of a case number so they've been left out in the search example below. But they could be left in and the results would be the same. We further refine the search by using the optional court filter to limit our search to cases in the U.S. District Court for the Eastern District of Texas. .. code-block:: python @@ -13,20 +19,78 @@ First, we'll configure the client: import os configuration = lexmachina.Configuration( - host="https://api.lexmachina.com", access_token=os.environ["BEARER_TOKEN"] + host="https://api.lexmachina.com", + access_token=os.environ["BEARER_TOKEN"] ) - api_client = lexmachina.ApiClient(configuration) + with lexmachina.ApiClient(configuration) as api_client: + + fed_dist_case_api_instance = lexmachina.FederalDistrictCasesApi(api_client) + case_search_results = fed_dist_case_api_instance.find_district_case_by_number( + case_numbers=["9:02-cv-00058"], court="txed" + ) -Now let's search for a case. For this example, we'll search for case number 9:02-cv-00058-JH, which is Samsung Electronics v. Sandisk Corporation in the U.S. District Court for the Eastern District of Texas. +When we look at ``case_search_results``, this search conveniently returns just one result (if we had left out the court filter, it would have returned three results): .. code-block:: python + + [ + DistrictCaseNumberSearchResult( + total_count=1, + input_case_number='9:02-cv-00058', + input_court='txed', + matches=[ + DistrictCaseNumberReference( + url='https://api.lexmachina.com/district-cases/88', + district_case_id=88, + case_number='9:02-cv-00058', + court='U.S. District Court for the Eastern District of Texas', + title='Samsung Electronics v. Sandisk Corporation' + ) + ] + +We can see from the output above the Lex Machina id for the case is 88. We will use that in the endpoint to get data on an individual case: - case_search_results = fed_cases_api.find_district_case_by_number( - case_numbers=["9:02-cv-00058-JH"], court="txed" - ) - case_result = case_search_results[0] + +.. code-block:: python + + with lexmachina.ApiClient(configuration) as api_client: + fed_dist_case_api_instance = lexmachina.FederalDistrictCasesApi(api_client) + apple_v_sandisk_case = fed_dist_case_api_instance.get_district_case(88) + + +Just for example purposes, here is a sampling of data provided for this individual case. You'll see a number of ids that you can then use to get more information on invidual judges, law firms, attorneys, and parties. + +.. code-block:: python + + apple_v_sandisk_case.case_type + ['Patent'] + + apple_v_sandisk_case.patents + [Patent(number='5473563', title='Nonvolatile semiconductor memory'), + Patent(number='5514889', title='Non-volatile semiconductor memory device and method for manufacturing the same'), + Patent(number='5546341', title='Nonvolatile semiconductor memory'), + Patent(number='5642309', title='Auto-program circuit in a nonvolatile semiconductor memory device')] + + apple_v_sandisk_case.judges + [FederalJudge(name='John H. Hannah Jr.', federal_judge_id=969)] + + apple_v_sandisk_case.law_firms + [LawFirm(name='Fish & Richardson', law_firm_id=906, client_party_ids=[123]), + LawFirm(name='McKool Smith', law_firm_id=3425, client_party_ids=[25635]), + LawFirm(name='Weil, Gotshal & Manges', law_firm_id=4521, client_party_ids=[123]), + LawFirm(name='Ramey & Flock', law_firm_id=17879, client_party_ids=[25635]), + LawFirm(name='The Roth Law Firm (rothfirm.com)', law_firm_id=18116, client_party_ids=[111]), + LawFirm(name='Chandler Law Offices (cmzlaw.net)', law_firm_id=19244, client_party_ids=[123]), + LawFirm(name='Law Office of Claude E Welch', law_firm_id=38775, client_party_ids=[123]), + LawFirm(name='Richards & Penn', law_firm_id=7915397, client_party_ids=[123]), + LawFirm(name='Wilson Sonsini Goodrich & Rosati', law_firm_id=75246884, client_party_ids=[25635])] + + +This example uses the case number search endpoint to find the case id, but there are other ways to find it, such as the Federal District case query endpoint. + +To know your search options, it helps to be familiar with the user-facing Lex Machina website. We recognize that, for new users, the search options are not always immediately obvious. If you would like any help using the Lex Machina API, please contact support@lexmachina.com. From cfa750e2c9aa8e2c84f0573be0d3c160c0f3ccb7 Mon Sep 17 00:00:00 2001 From: Frances Wong Date: Mon, 5 May 2025 16:09:32 -0600 Subject: [PATCH 06/21] [#15] tweaks to existing docs, start fleshing out spreadsheet example --- docs/index.rst | 1 + docs/installation.rst | 2 +- docs/quickstart.rst | 1 - docs/spreadsheet_example.rst | 97 +++++++++++++++++++++++++++++++++++- 4 files changed, 98 insertions(+), 3 deletions(-) diff --git a/docs/index.rst b/docs/index.rst index 4a1c71f..db0e4a3 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -8,5 +8,6 @@ lexmachina-client documentation installation quickstart spreadsheet_example + :caption: Contents: diff --git a/docs/installation.rst b/docs/installation.rst index b90deb6..cbdc743 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -8,7 +8,7 @@ Install using pip within a virtual environment: $ pip install lexmachina-client -The examples in the quickstart assume a valid bearer token is set in the environment variable ``BEARER_TOKEN``. To get a bearer token use the API's ``/oauth2/token`` endpoint: +The examples in the quickstart assume a valid bearer token is set in the environment variable ``BEARER_TOKEN``. You can get a bearer token by using the ``/oauth2/token`` endpoint: .. code-block:: bash diff --git a/docs/quickstart.rst b/docs/quickstart.rst index a682cdd..53c5498 100644 --- a/docs/quickstart.rst +++ b/docs/quickstart.rst @@ -24,7 +24,6 @@ In the first code block, you will see the following steps: ) with lexmachina.ApiClient(configuration) as api_client: - fed_dist_case_api_instance = lexmachina.FederalDistrictCasesApi(api_client) case_search_results = fed_dist_case_api_instance.find_district_case_by_number( case_numbers=["9:02-cv-00058"], court="txed" diff --git a/docs/spreadsheet_example.rst b/docs/spreadsheet_example.rst index 75743e6..6f5d659 100644 --- a/docs/spreadsheet_example.rst +++ b/docs/spreadsheet_example.rst @@ -2,5 +2,100 @@ Adding search results to a spreadsheet ====================================== -In this simple example we will be searching for antitrust cases terminated in 2024, do a little analysis, and add the results to a spreadsheet. +In the quickstart, we looked for an individual case. In this example, we'll look at a group of cases to see if we can glean any interest information. + + +For this example, we'll look for Antitrust cases terminated in 2024, do some light analysis, and then add the cases to a spreadsheet. + + +In the quickstart, you saw how we created an API client object using a context manager and then an object with access to the Federal District case endpoints: + +.. code-block:: python + + import lexmachina + import os + + configuration = lexmachina.Configuration( + host="https://api.lexmachina.com", + access_token=os.environ["BEARER_TOKEN"] + ) + + with lexmachina.ApiClient(configuration) as api_client: + fed_dist_case_api_instance = lexmachina.FederalDistrictCasesApi(api_client) + + +To simplify the code blocks used in this example, we'll simply refer to the ``fed_dist_case_api_instance`` created in the context manager. + + +First, we'll create a query for Antitrust cases terminated in 2024. The API returns results in "pages" with each page showing a maximum of 100 results. If your search returns more than 100 results, you'll need to page through them. + +.. code-block:: python + + query = { + "caseTypes": { + "include": [ + "Antitrust" + ] + }, + "dates": { + "terminated": { + "onOrAfter": "2024-01-01", + "onOrBefore": "2024-12-31" + } + }, + "page": 1, + "pageSize": 100 + } + + +We then page through the results to get the resulting case ids. Some queries return a lot of cases, so we added a print to assure ourselves something is happening while we wait. + + +.. code-block:: python + + case_ids = [] + done_paging = False + + while not done_paging: + query_response = fed_dist_case_api_instance.query_district_cases(query) + + if query_response.cases: + current_page = query['page'] + print(f'{current_page=}') + result_case_ids = [caseref.district_case_id for caseref in query_response.cases] + case_ids += result_case_ids + query['page'] = current_page + 1 + + else: + print(f'Antitrust cases terminated in 2024 has length {len(case_ids)}') + done_paging=True + + +We can then get case data for each of case ids. While we could use list comprehension for this, for this example we'll use a loop so we can add prints to assure ourselves something is happening. + + +.. code-block:: python + + case_data = [] + + for case_id in case_ids: + case_data.append(fed_dist_case_api_instance.get_district_case(case_id)) + if len(case_data) % 50 == 0: + print(f'{len(case_data)} out of {len(case_ids)} processed') + + + + + + + + + + + + + + + + From 7014b8376b9c1307a7775d353f6fc3dde2228786 Mon Sep 17 00:00:00 2001 From: Frances Wong Date: Mon, 5 May 2025 16:16:50 -0600 Subject: [PATCH 07/21] [#15] Fix toctree structure, fix typos in spreadsheet example --- docs/index.rst | 3 +-- docs/spreadsheet_example.rst | 14 +++++++------- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/docs/index.rst b/docs/index.rst index db0e4a3..bca8909 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -3,11 +3,10 @@ lexmachina-client documentation .. toctree:: + :caption: Contents: :maxdepth: 2 installation quickstart spreadsheet_example - - :caption: Contents: diff --git a/docs/spreadsheet_example.rst b/docs/spreadsheet_example.rst index 6f5d659..ce8a4c1 100644 --- a/docs/spreadsheet_example.rst +++ b/docs/spreadsheet_example.rst @@ -2,13 +2,13 @@ Adding search results to a spreadsheet ====================================== -In the quickstart, we looked for an individual case. In this example, we'll look at a group of cases to see if we can glean any interest information. +In the quickstart, we looked for an individual case. In this example, we'll look at a group of cases to see if we can glean any interesting information about the group of cases as a whole. -For this example, we'll look for Antitrust cases terminated in 2024, do some light analysis, and then add the cases to a spreadsheet. +For this example, we'll look for Antitrust cases terminated in 2024, do some light analysis, and then the cases and analysis to a spreadsheet. -In the quickstart, you saw how we created an API client object using a context manager and then an object with access to the Federal District case endpoints: +In the quickstart, you saw how we created an API client object using a context manager. We then used the API client to create an object with access to the Federal District case API endpoints: .. code-block:: python @@ -24,7 +24,7 @@ In the quickstart, you saw how we created an API client object using a context m fed_dist_case_api_instance = lexmachina.FederalDistrictCasesApi(api_client) -To simplify the code blocks used in this example, we'll simply refer to the ``fed_dist_case_api_instance`` created in the context manager. +To simplify the code blocks used in this example, we'll simply refer to the ``fed_dist_case_api_instance`` created in the code block above. First, we'll create a query for Antitrust cases terminated in 2024. The API returns results in "pages" with each page showing a maximum of 100 results. If your search returns more than 100 results, you'll need to page through them. @@ -45,10 +45,10 @@ First, we'll create a query for Antitrust cases terminated in 2024. The API retu }, "page": 1, "pageSize": 100 - } + } -We then page through the results to get the resulting case ids. Some queries return a lot of cases, so we added a print to assure ourselves something is happening while we wait. +Below, we iterate over pages and save the resulting case ids to a list. While this query returns under 700 cases, some queries return a lot of cases, so we use a print to assure ourselves something is happening while we wait. .. code-block:: python @@ -71,7 +71,7 @@ We then page through the results to get the resulting case ids. Some queries ret done_paging=True -We can then get case data for each of case ids. While we could use list comprehension for this, for this example we'll use a loop so we can add prints to assure ourselves something is happening. +Armed with case ids, we can then get case data for each of those cases. While we could use more efficient list comprehension, for this example we'll use a loop so we can add prints and assure ourselves something is happening while we wait. .. code-block:: python From 4f223f369974ebb7733021e87784d8fce72d48ff Mon Sep 17 00:00:00 2001 From: Frances Wong Date: Tue, 6 May 2025 09:53:39 -0600 Subject: [PATCH 08/21] [#15] Make references to other docs links --- docs/installation.rst | 2 +- docs/quickstart.rst | 5 +++-- docs/spreadsheet_example.rst | 16 ++-------------- 3 files changed, 6 insertions(+), 17 deletions(-) diff --git a/docs/installation.rst b/docs/installation.rst index cbdc743..a097882 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -26,4 +26,4 @@ Then set that bearer token in the environment variable: $ export BEARER_TOKEN= - +Next: :doc:`quickstart` diff --git a/docs/quickstart.rst b/docs/quickstart.rst index 53c5498..cbb3340 100644 --- a/docs/quickstart.rst +++ b/docs/quickstart.rst @@ -1,7 +1,7 @@ Quickstart ========== -After following the installation instructions and setting your bearer token in the environment variable ``BEARER_TOKEN``, you are ready to start! +After following the :doc:`installation` instructions and setting your bearer token in the environment variable ``BEARER_TOKEN``, you are ready to start! For this example, we'll search for a case and get its details. @@ -92,7 +92,8 @@ This example uses the case number search endpoint to find the case id, but there To know your search options, it helps to be familiar with the user-facing Lex Machina website. We recognize that, for new users, the search options are not always immediately obvious. If you would like any help using the Lex Machina API, please contact support@lexmachina.com. - +Next: :doc:`spreadsheet_example` +Previous: :doc:`installation` diff --git a/docs/spreadsheet_example.rst b/docs/spreadsheet_example.rst index ce8a4c1..141a830 100644 --- a/docs/spreadsheet_example.rst +++ b/docs/spreadsheet_example.rst @@ -2,7 +2,7 @@ Adding search results to a spreadsheet ====================================== -In the quickstart, we looked for an individual case. In this example, we'll look at a group of cases to see if we can glean any interesting information about the group of cases as a whole. +In the :doc:`quickstart `, we looked for an individual case. In this example, we'll look at a group of cases to see if we can glean any interesting information about the group of cases as a whole. For this example, we'll look for Antitrust cases terminated in 2024, do some light analysis, and then the cases and analysis to a spreadsheet. @@ -86,16 +86,4 @@ Armed with case ids, we can then get case data for each of those cases. While we - - - - - - - - - - - - - +Previous: :doc:`quickstart` From 193942451f40a4465b81f8962bdfea754408267a Mon Sep 17 00:00:00 2001 From: Frances Wong Date: Tue, 6 May 2025 11:26:40 -0600 Subject: [PATCH 09/21] [#15] add notebook with spreadsheet example --- examples/spreadsheet_example.ipynb | 124 +++++++++++++++++++++++++++++ 1 file changed, 124 insertions(+) create mode 100644 examples/spreadsheet_example.ipynb diff --git a/examples/spreadsheet_example.ipynb b/examples/spreadsheet_example.ipynb new file mode 100644 index 0000000..82f6dc8 --- /dev/null +++ b/examples/spreadsheet_example.ipynb @@ -0,0 +1,124 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "ae993c1c-a333-4de3-b2a9-842b31f3a386", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import lexmachina" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "28656d9f-a5d0-4d32-8aa0-a08ff294b668", + "metadata": {}, + "outputs": [], + "source": [ + "configuration = lexmachina.Configuration(\n", + " host=\"https://api.lexmachina.com\",\n", + " access_token=os.environ[\"BEARER_TOKEN\"]\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "a2059042-88b3-494a-8f2a-a727b41f29b0", + "metadata": {}, + "outputs": [], + "source": [ + "query = {\n", + " \"caseTypes\": {\n", + " \"include\": [\n", + " \"Antitrust\"\n", + " ]\n", + " },\n", + " \"dates\": {\n", + " \"terminated\": {\n", + " \"onOrAfter\": \"2024-01-01\",\n", + " \"onOrBefore\": \"2024-12-31\"\n", + " }\n", + " },\n", + " \"page\": 1,\n", + " \"pageSize\": 100\n", + " }" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "18d2c87d-7224-47c1-a503-d4f81d09c8e9", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "current_page=1\n", + "current_page=2\n", + "current_page=3\n", + "current_page=4\n", + "current_page=5\n", + "current_page=6\n", + "current_page=7\n", + "Antitrust cases terminated in 2024 has length 671\n" + ] + } + ], + "source": [ + "with lexmachina.ApiClient(configuration) as api_client:\n", + " fed_dist_case_api_instance = lexmachina.FederalDistrictCasesApi(api_client)\n", + "\n", + " case_ids = []\n", + " done_paging = False\n", + "\n", + " while not done_paging:\n", + " query_response = fed_dist_case_api_instance.query_district_cases(query)\n", + " \n", + " if query_response.cases:\n", + " current_page = query['page']\n", + " print(f'{current_page=}')\n", + " result_case_ids = [caseref.district_case_id for caseref in query_response.cases]\n", + " case_ids += result_case_ids\n", + " query['page'] = current_page + 1\n", + " \n", + " else:\n", + " print(f'Antitrust cases terminated in 2024 has length {len(case_ids)}')\n", + " done_paging=True " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b069531f-0c0b-440a-bf9a-18d5bd02bf01", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 29cc2b63a9291b23bceb68f44dcaf04229ed6c13 Mon Sep 17 00:00:00 2001 From: Frances Wong Date: Tue, 6 May 2025 15:32:12 -0600 Subject: [PATCH 10/21] [#15] Flesh out spreadsheet example --- docs/spreadsheet_example.rst | 108 ++++- examples/spreadsheet_example.ipynb | 649 ++++++++++++++++++++++++++++- 2 files changed, 748 insertions(+), 9 deletions(-) diff --git a/docs/spreadsheet_example.rst b/docs/spreadsheet_example.rst index 141a830..ee114c7 100644 --- a/docs/spreadsheet_example.rst +++ b/docs/spreadsheet_example.rst @@ -48,7 +48,7 @@ First, we'll create a query for Antitrust cases terminated in 2024. The API retu } -Below, we iterate over pages and save the resulting case ids to a list. While this query returns under 700 cases, some queries return a lot of cases, so we use a print to assure ourselves something is happening while we wait. +Below, we iterate over pages and save the resulting case ids to a list. While this query returns under 700 cases, some queries return a lot of cases, so we use a print to provide progress updates. .. code-block:: python @@ -71,7 +71,7 @@ Below, we iterate over pages and save the resulting case ids to a list. While th done_paging=True -Armed with case ids, we can then get case data for each of those cases. While we could use more efficient list comprehension, for this example we'll use a loop so we can add prints and assure ourselves something is happening while we wait. +Armed with case ids, we can then get case data for each of those cases. While we could use more efficient list comprehension, for this example we'll use a loop so we can use a print to get updates on progress. .. code-block:: python @@ -84,6 +84,110 @@ Armed with case ids, we can then get case data for each of those cases. While we print(f'{len(case_data)} out of {len(case_ids)} processed') +We can now do some analysis. First we'll check which judges saw the most of these cases. +We'll also get info on how long these cases lasted. + +.. code-block:: python + + from collections import defaultdict + + cases_by_judge = defaultdict(list) + + for c in case_data: + for j in c.judges: + cases_by_judge[(j.name, j.federal_judge_id)].append( + dict(case_id=c.district_case_id, duration=c.dates.terminated - c.dates.filed) + ) + + +The above shows that 378 judges saw these 671 cases. + +Next, we'll get some timing info: + + +.. code-block:: python + + all_durations = [] + + for case_group in cases_by_judge.values(): + all_durations += [c['duration'].days for c in case_group] + + +If we import the ``statistics`` library, we can check out the mean and median values: + +.. code-block:: python + + round(statistics.mean(sorted_all_durations)) + 1084 + + statistics.median(sorted_all_durations) + 451 + + +Next, let's check how long these durations were for the judges who saw the most cases. + +First let's sort judges by case counts: + +.. code-block:: python + + case_count_by_judges = [ + (judge_info, len(cases_by_judge[judge_info])) + for judge_info in cases_by_judge + ] + + sorted_case_counts_by_judges = sorted( + case_count_by_judges, key=lambda x: x[-1], reverse=True + ) + + +To see the duration stats for the top five judges: + +.. code-block:: python + + for j in sorted_case_counts_by_judges[:5]: + judge_cases = cases_by_judge[j[0]] + judge_durations = [c['duration'].days for c in judge_cases] + print('--------------------') + print(f'judge name: {j[0][0]}') + print(f'total num cases: {j[1]}') + print(f'average duration: {round(statistics.mean(judge_durations))}') + print(f'median duration: {statistics.median(judge_durations)}') + + + -------------------- + judge name: Edgardo Ramos + total num cases: 37 + average duration: 133 + median duration: 95 + -------------------- + judge name: Waverly David Crenshaw Jr. + total num cases: 34 + average duration: 280 + median duration: 307.0 + -------------------- + judge name: Sarah Elizabeth Pitlyk + total num cases: 30 + average duration: 1146 + median duration: 1178.0 + -------------------- + judge name: P. Kevin Castel + total num cases: 23 + average duration: 823 + median duration: 912 + -------------------- + judge name: Sara Elizabeth Lioi + total num cases: 23 + average duration: 65 + median duration: 71 + + + + + + + + + Previous: :doc:`quickstart` diff --git a/examples/spreadsheet_example.ipynb b/examples/spreadsheet_example.ipynb index 82f6dc8..6a8b279 100644 --- a/examples/spreadsheet_example.ipynb +++ b/examples/spreadsheet_example.ipynb @@ -2,13 +2,17 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": 78, "id": "ae993c1c-a333-4de3-b2a9-842b31f3a386", "metadata": {}, "outputs": [], "source": [ "import os\n", - "import lexmachina" + "import lexmachina\n", + "from collections import defaultdict\n", + "# pip install openpyxl\n", + "import openpyxl\n", + "import statistics" ] }, { @@ -26,7 +30,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 6, "id": "a2059042-88b3-494a-8f2a-a727b41f29b0", "metadata": {}, "outputs": [], @@ -50,7 +54,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 7, "id": "18d2c87d-7224-47c1-a503-d4f81d09c8e9", "metadata": {}, "outputs": [ @@ -65,7 +69,20 @@ "current_page=5\n", "current_page=6\n", "current_page=7\n", - "Antitrust cases terminated in 2024 has length 671\n" + "Antitrust cases terminated in 2024 has length 671\n", + "50 out of 671 processed\n", + "100 out of 671 processed\n", + "150 out of 671 processed\n", + "200 out of 671 processed\n", + "250 out of 671 processed\n", + "300 out of 671 processed\n", + "350 out of 671 processed\n", + "400 out of 671 processed\n", + "450 out of 671 processed\n", + "500 out of 671 processed\n", + "550 out of 671 processed\n", + "600 out of 671 processed\n", + "650 out of 671 processed\n" ] } ], @@ -88,14 +105,632 @@ " \n", " else:\n", " print(f'Antitrust cases terminated in 2024 has length {len(case_ids)}')\n", - " done_paging=True " + " done_paging=True\n", + "\n", + " case_data = []\n", + "\n", + " for case_id in case_ids:\n", + " case_data.append(fed_dist_case_api_instance.get_district_case(case_id))\n", + " if len(case_data) % 50 == 0:\n", + " print(f'{len(case_data)} out of {len(case_ids)} processed')\n", + "\n", + " " ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "id": "b069531f-0c0b-440a-bf9a-18d5bd02bf01", "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[FederalJudge(name='Mitchell S. Goldberg', federal_judge_id=3193)]" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "case_data[0].judges" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "id": "88204f40-0c2e-4b59-b140-407b6b0bd85e", + "metadata": {}, + "outputs": [], + "source": [ + "cases_by_judge = defaultdict(list)" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "id": "92093b5a-acd0-41c7-8b3c-841d94d873f5", + "metadata": {}, + "outputs": [], + "source": [ + "for c in case_data:\n", + " for j in c.judges:\n", + " cases_by_judge[(j.name, j.federal_judge_id)].append(dict(case_id=c.district_case_id, duration=c.dates.terminated - c.dates.filed))\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "id": "6e7f1382-5988-4cb7-ae8b-058f9109fa7c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "378" + ] + }, + "execution_count": 64, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(cases_by_judge)" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "id": "afdb2296-fe46-4f84-8635-449ff78c0a01", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[('Mitchell S. Goldberg', 3193),\n", + " ('Edmond E-Min Chang', 3342),\n", + " ('Miriam Goldman Cedarbaum', 406),\n", + " ('Lorna Gail Schofield', 3451),\n", + " ('Joel A. Pisano', 2851)]" + ] + }, + "execution_count": 65, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "list(cases_by_judge)[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "id": "ba5310fb-db7d-40e2-b682-54f9d8fe8eaf", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[{'case_id': 2000009555, 'duration': datetime.timedelta(days=4981)}]" + ] + }, + "execution_count": 66, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cases_by_judge[('Lorna Gail Schofield', 3451)]" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "id": "34c9b660-853d-4b26-92ff-b03849bfa8bd", + "metadata": {}, + "outputs": [], + "source": [ + "case_count_by_judges = [(judge_info, len(cases_by_judge[judge_info])) for judge_info in cases_by_judge]" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "id": "b8c7b5f7-9798-4036-957b-22f40eed1d83", + "metadata": {}, + "outputs": [], + "source": [ + "sorted_case_counts_by_judges = sorted(case_count_by_judges, key=lambda x: x[-1], reverse=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "id": "5a9b2351-d5a0-4cef-b744-7a986428d4e7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[(('Edgardo Ramos', 3405), 37),\n", + " (('Waverly David Crenshaw Jr.', 3603), 34),\n", + " (('Sarah Elizabeth Pitlyk', 7465646), 30),\n", + " (('P. Kevin Castel', 3029), 23),\n", + " (('Sara Elizabeth Lioi', 3140), 23)]" + ] + }, + "execution_count": 69, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sorted_case_counts_by_judges[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "id": "514d178c-3110-4564-a006-d6a6dbb4ebc7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[(('Steven Douglas Merryday', 1627), 1),\n", + " (('Mary Stenson Scriven', 3189), 1),\n", + " (('John George Koeltl', 1305), 1),\n", + " (('John Charles Hinderaker', 8938396), 1),\n", + " (('Yvonne Gonzalez Rogers', 3404), 1)]" + ] + }, + "execution_count": 70, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sorted_case_counts_by_judges[-5:]" + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "id": "b5b75e1a-b339-4255-a0e0-cd299b8eb944", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[[{'case_id': 97091, 'duration': datetime.timedelta(days=6752)},\n", + " {'case_id': 2000045171, 'duration': datetime.timedelta(days=4015)},\n", + " {'case_id': 2000045229, 'duration': datetime.timedelta(days=4011)},\n", + " {'case_id': 2000046633, 'duration': datetime.timedelta(days=3980)},\n", + " {'case_id': 2000047864, 'duration': datetime.timedelta(days=3952)},\n", + " {'case_id': 2000049181, 'duration': datetime.timedelta(days=3918)},\n", + " {'case_id': 2000049340, 'duration': datetime.timedelta(days=3912)},\n", + " {'case_id': 2000049341, 'duration': datetime.timedelta(days=3912)},\n", + " {'case_id': 2000049655, 'duration': datetime.timedelta(days=3905)},\n", + " {'case_id': 2000049658, 'duration': datetime.timedelta(days=3905)},\n", + " {'case_id': 2000049746, 'duration': datetime.timedelta(days=3903)},\n", + " {'case_id': 2000051026, 'duration': datetime.timedelta(days=3863)}],\n", + " [{'case_id': 48907, 'duration': datetime.timedelta(days=6117)},\n", + " {'case_id': 2007889413, 'duration': datetime.timedelta(days=935)},\n", + " {'case_id': 2034461979, 'duration': datetime.timedelta(days=3)}],\n", + " [{'case_id': 2000009555, 'duration': datetime.timedelta(days=4981)}],\n", + " [{'case_id': 2000009555, 'duration': datetime.timedelta(days=4981)}],\n", + " [{'case_id': 2000026715, 'duration': datetime.timedelta(days=4739)},\n", + " {'case_id': 2000028620, 'duration': datetime.timedelta(days=4676)},\n", + " {'case_id': 2000029363, 'duration': datetime.timedelta(days=4653)},\n", + " {'case_id': 2000034588, 'duration': datetime.timedelta(days=4500)},\n", + " {'case_id': 2000035078, 'duration': datetime.timedelta(days=4481)}]]" + ] + }, + "execution_count": 71, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "list( cases_by_judge.values())[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": 84, + "id": "014909c0-f89c-45a8-b098-a797f4cfc51c", + "metadata": {}, + "outputs": [], + "source": [ + "all_durations = []" + ] + }, + { + "cell_type": "code", + "execution_count": 85, + "id": "7a21f080-4aba-445d-a25a-4dc09e483408", + "metadata": {}, + "outputs": [], + "source": [ + "for case_group in cases_by_judge.values():\n", + " all_durations += [c['duration'].days for c in case_group]" + ] + }, + { + "cell_type": "code", + "execution_count": 86, + "id": "53db69a3-8671-4982-8837-ffe726baa09e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[6752, 4015, 4011, 3980, 3952]" + ] + }, + "execution_count": 86, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "all_durations[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": 87, + "id": "a54932ca-033e-4686-91fe-bcddd0e37582", + "metadata": {}, + "outputs": [], + "source": [ + "sorted_all_durations = sorted(all_durations)" + ] + }, + { + "cell_type": "code", + "execution_count": 88, + "id": "3476b298-0fe6-4679-ba74-72a6e84c2a4c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[0, 1, 1]" + ] + }, + "execution_count": 88, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sorted_all_durations[:3]" + ] + }, + { + "cell_type": "code", + "execution_count": 89, + "id": "f1d85467-43de-4c1e-9ede-81f4ab8fbea5", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[4981, 6117, 6752]" + ] + }, + "execution_count": 89, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sorted_all_durations[-3:]" + ] + }, + { + "cell_type": "code", + "execution_count": 97, + "id": "9ac3916f-77b4-47f1-925c-db595de7b037", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1084" + ] + }, + "execution_count": 97, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "round(statistics.mean(sorted_all_durations))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "626bf1b1-92cb-45e8-8a0a-9d6db062df01", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 105, + "id": "585fe050-7a73-47f3-bf89-b7986ad9f460", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "451" + ] + }, + "execution_count": 105, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "statistics.median(sorted_all_durations)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "885d14b0-93f4-4d52-ad65-3d227118e4ca", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 99, + "id": "655a73b1-ee3c-481a-9757-c29a66774d09", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[(('Edgardo Ramos', 3405), 37),\n", + " (('Waverly David Crenshaw Jr.', 3603), 34),\n", + " (('Sarah Elizabeth Pitlyk', 7465646), 30),\n", + " (('P. Kevin Castel', 3029), 23),\n", + " (('Sara Elizabeth Lioi', 3140), 23)]" + ] + }, + "execution_count": 99, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sorted_case_counts_by_judges[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2edeb2cf-d11f-4c7f-b359-96dfc33e39f3", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 104, + "id": "f6073a89-cc82-4400-bd2d-bde0894e5f60", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--------------------\n", + "judge name: Edgardo Ramos\n", + "total num cases: 37\n", + "average duration: 133\n", + "median duration: 95\n", + "--------------------\n", + "judge name: Waverly David Crenshaw Jr.\n", + "total num cases: 34\n", + "average duration: 280\n", + "median duration: 307.0\n", + "--------------------\n", + "judge name: Sarah Elizabeth Pitlyk\n", + "total num cases: 30\n", + "average duration: 1146\n", + "median duration: 1178.0\n", + "--------------------\n", + "judge name: P. Kevin Castel\n", + "total num cases: 23\n", + "average duration: 823\n", + "median duration: 912\n", + "--------------------\n", + "judge name: Sara Elizabeth Lioi\n", + "total num cases: 23\n", + "average duration: 65\n", + "median duration: 71\n" + ] + } + ], + "source": [ + "for j in sorted_case_counts_by_judges[:5]:\n", + " judge_cases = cases_by_judge[j[0]]\n", + " judge_durations = [c['duration'].days for c in judge_cases]\n", + " print('--------------------')\n", + " print(f'judge name: {j[0][0]}')\n", + " print(f'total num cases: {j[1]}')\n", + " print(f'average duration: {round(statistics.mean(judge_durations))}')\n", + " print(f'median duration: {statistics.median(judge_durations)}')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c9de2e16-b774-41d1-91e0-270b592c385f", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9212a3e4-b281-4089-b053-b3305c008d35", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4f28acd2-7ea9-419e-bcb4-5175f0570f0e", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a08c2a8d-6996-4a0a-b10c-afcef23d4799", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c7d75145-ac26-4119-ad41-e5315579093f", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "618e7449-fe8c-4bf3-85bb-85d4646b1294", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ba4cb4af-3f34-4b16-bc8c-f2da794920b6", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "79777192-6cf6-4139-9d66-17b6b7594c7d", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "91e2580a-702c-4491-ae5d-49309fef2174", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a7cf2dfa-2344-4ac9-bf24-abcded22ea61", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3a20cd68-dae1-4408-aba9-6b7dcb27a47e", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e1ff2bb3-3595-489f-91c6-0d24d756f3e3", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4f510491-a5a9-4153-aa77-d458b52e9858", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "04366b77-aa4c-4805-b4d1-1049def3457b", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "280fe618-01ef-4763-b720-c5b2f40a12fa", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2189d583-8837-40dd-a250-8647bd9d2a11", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "270b1183-8bce-440d-9b9a-f1a99e2f09bb", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "319a6849-254f-4b6a-b220-8d2cb70b70e0", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9cbd1d69-08c3-420d-b0fb-dae0da8096ca", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "43b7a5ee-047a-486d-ba08-f3e9b309de59", + "metadata": {}, "outputs": [], "source": [] } From 3b99a13dba58eba5f2fa3a77fa8ef38f5a74e347 Mon Sep 17 00:00:00 2001 From: Frances Wong Date: Tue, 6 May 2025 16:31:08 -0600 Subject: [PATCH 11/21] [#15] Finish most of spreadsheet example --- docs/spreadsheet_example.rst | 112 +++++++- examples/spreadsheet_example.ipynb | 434 +++++++++++++++++++++++------ 2 files changed, 452 insertions(+), 94 deletions(-) diff --git a/docs/spreadsheet_example.rst b/docs/spreadsheet_example.rst index ee114c7..028e3d6 100644 --- a/docs/spreadsheet_example.rst +++ b/docs/spreadsheet_example.rst @@ -2,13 +2,13 @@ Adding search results to a spreadsheet ====================================== -In the :doc:`quickstart `, we looked for an individual case. In this example, we'll look at a group of cases to see if we can glean any interesting information about the group of cases as a whole. +In the :doc:`quickstart`, we looked for an individual case. In this example, we'll look at a group of cases to see if we can glean any interesting information about the group of cases as a whole. -For this example, we'll look for Antitrust cases terminated in 2024, do some light analysis, and then the cases and analysis to a spreadsheet. +For this example, we'll look at Antitrust cases terminated in 2024, do some light analysis, and add the cases to a spreadsheet. A jupyter notebook with this code can be found in `the examples folder `_ -In the quickstart, you saw how we created an API client object using a context manager. We then used the API client to create an object with access to the Federal District case API endpoints: +In the :doc:`quickstart`, you saw how we created an API client object using a context manager. We then used the API client to create an object with access to the Federal District case API endpoints: .. code-block:: python @@ -117,6 +117,8 @@ If we import the ``statistics`` library, we can check out the mean and median va .. code-block:: python + import statistics + round(statistics.mean(sorted_all_durations)) 1084 @@ -181,10 +183,112 @@ To see the duration stats for the top five judges: median duration: 71 - +Now lets add the cases to a spreadsheet. + +For this example, since we focused on judges until now, for the spreadsheet let's focus on something different and say we are most interested in analyzing which law firms and the roles they represented. + +First, lets create the rows. We'll first determine which columns we want and then add that info for each row. + +.. code-block:: python + + column_names = [ + 'case id', + 'case number', + 'case title', + 'law_firm', + 'law_firm_id', + 'party', + 'party_id', + 'role' + ] + + rows = [] + rows.append(column_names) + + for c in case_data: + for law_firm in c.law_firms: + for party_id in law_firm.client_party_ids: + party = parties_by_id_by_case_id[c.district_case_id][party_id] + rows.append( + ( + c.district_case_id, + c.case_no, + c.title, + (law_firm.name, law_firm.law_firm_id), + (party.name, party.party_id), + party.role + ) + ) + +And checking a few of them, including the header to make sure we added it. +.. code-block:: python + + len(rows) + 19083 + + rows[0] + ['case id', + 'case number', + 'case title', + 'law_firm', + 'law_firm_id', + 'party', + 'party_id', + 'role'] + + rows[1] + (97091, + '2:06-cv-01833-MSG', + 'VISTA HEALTHPLAN, INC. v. CEPHALON, INC. et al', + 'Kessler Topaz Meltzer & Check', + 27, + 'SHIRLEY PANEBIANO', + 257121, + 'Plaintiff') + + rows[10000] + (2005150350, + '3:20-cv-05792-JD', + 'In re Google Play Developer Antitrust Litigation', + "O'Melveny & Myers", + 227639559, + 'Google Asia Pacific PTE. Limited', + 52824280, + 'Defendant') + + rows[-1] + (2034774512, + '3:24-cv-09118-VC', + 'Kushner et al v. Chunghwa Picture Tubes, Ltd. et al', + 'Goldman Scarlato & Penny', + 15211344, + 'Barry Kushner', + 10805, + 'Plaintiff') + + +Now let's add these rows to a spreadsheet. We could have created this list directly in the previous step, but it's useful to make sure things look good first in a readable way. + +For this example we'll be using `openpyxl `_, which you can install using ``pip install openpyxl``. + + +.. code-block:: python + + from openpyxl import Workbook + + wb = Workbook() + ws = wb.active + + for r in rows: + ws.append(r) + + wb.save("antitrust_terminated_2024_law_firms.xlsx") + wb.close() + +The rows are then saved to the spreadsheet in your working directory. diff --git a/examples/spreadsheet_example.ipynb b/examples/spreadsheet_example.ipynb index 6a8b279..a08e671 100644 --- a/examples/spreadsheet_example.ipynb +++ b/examples/spreadsheet_example.ipynb @@ -455,14 +455,6 @@ "round(statistics.mean(sorted_all_durations))" ] }, - { - "cell_type": "code", - "execution_count": null, - "id": "626bf1b1-92cb-45e8-8a0a-9d6db062df01", - "metadata": {}, - "outputs": [], - "source": [] - }, { "cell_type": "code", "execution_count": 105, @@ -484,14 +476,6 @@ "statistics.median(sorted_all_durations)" ] }, - { - "cell_type": "code", - "execution_count": null, - "id": "885d14b0-93f4-4d52-ad65-3d227118e4ca", - "metadata": {}, - "outputs": [], - "source": [] - }, { "cell_type": "code", "execution_count": 99, @@ -517,14 +501,6 @@ "sorted_case_counts_by_judges[:5]" ] }, - { - "cell_type": "code", - "execution_count": null, - "id": "2edeb2cf-d11f-4c7f-b359-96dfc33e39f3", - "metadata": {}, - "outputs": [], - "source": [] - }, { "cell_type": "code", "execution_count": 104, @@ -576,163 +552,441 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 108, "id": "c9de2e16-b774-41d1-91e0-270b592c385f", "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": [ + "[LawFirm(name='Kessler Topaz Meltzer & Check', law_firm_id=27, client_party_ids=[257121, 52552843, 231694, 37904, 23356662, 20047290, 24917852, 37648157]),\n", + " LawFirm(name='Hagens Berman Sobol Shapiro', law_firm_id=30, client_party_ids=[231694]),\n", + " LawFirm(name='Berger Montague', law_firm_id=51, client_party_ids=[231694]),\n", + " LawFirm(name='Faruqi & Faruqi', law_firm_id=54, client_party_ids=[231694]),\n", + " LawFirm(name='Reed Smith', law_firm_id=433, client_party_ids=[27, 20020989]),\n", + " LawFirm(name='Department of Justice', law_firm_id=604, client_party_ids=[20020989]),\n", + " LawFirm(name='WilmerHale', law_firm_id=853, client_party_ids=[20036179]),\n", + " LawFirm(name='Stevens & Lee', law_firm_id=1280, client_party_ids=[2993, 1087]),\n", + " LawFirm(name='Venable', law_firm_id=1909, client_party_ids=[20030256, 2993, 20036179, 20027124, 1855, 27, 20020989, 1087]),\n", + " LawFirm(name='State of Pennsylvania', law_firm_id=2338, client_party_ids=[27, 20020989]),\n", + " LawFirm(name='Rawlings & Associates', law_firm_id=3008, client_party_ids=[231694]),\n", + " LawFirm(name='Mintz, Levin, Cohn, Ferris, Glovsky and Popeo', law_firm_id=4142, client_party_ids=[2993, 1087]),\n", + " LawFirm(name='Cravath, Swaine & Moore', law_firm_id=8914, client_party_ids=[27, 20020989, 1855]),\n", + " LawFirm(name='Volpe & Koenig', law_firm_id=18520, client_party_ids=[20030256, 20027124]),\n", + " LawFirm(name='Kirkland & Ellis', law_firm_id=20983, client_party_ids=[20030256, 2993, 20036179, 20027124, 1855, 27, 20020989, 1087]),\n", + " LawFirm(name='Eckert Seamans Cherin & Mellott', law_firm_id=27984, client_party_ids=[20036179]),\n", + " LawFirm(name='Willkie Farr & Gallagher', law_firm_id=31981, client_party_ids=[20030256, 2993, 1087, 20027124, 20020989, 1855]),\n", + " LawFirm(name='Robinson & Cole', law_firm_id=33881, client_party_ids=[27, 20020989]),\n", + " LawFirm(name='Finkelstein Thompson', law_firm_id=35890, client_party_ids=[37904, 23356662, 20047290, 24917852, 37648157]),\n", + " LawFirm(name='Lewis Brisbois Bisgaard & Smith', law_firm_id=54662, client_party_ids=[27, 20020989]),\n", + " LawFirm(name=\"Conrad, O'Brien, Gellman & Rohn\", law_firm_id=99445, client_party_ids=[20036179]),\n", + " LawFirm(name='Klafter Olsen & Lesser', law_firm_id=230148, client_party_ids=[20047290]),\n", + " LawFirm(name='Criden & Love', law_firm_id=303343, client_party_ids=[52552843]),\n", + " LawFirm(name='Hangley Aronchick Segal Pudlin & Schiller', law_firm_id=449598, client_party_ids=[9750]),\n", + " LawFirm(name='Armstrong Teasdale', law_firm_id=479700, client_party_ids=[1855]),\n", + " LawFirm(name='City of Philadelphia, Pennsylvania', law_firm_id=797446, client_party_ids=[27, 20020989]),\n", + " LawFirm(name='Hanzman Criden Love', law_firm_id=1241171, client_party_ids=[37904, 23356662, 20047290, 24917852, 37648157]),\n", + " LawFirm(name='Finkelstein & Krinsk', law_firm_id=1356290, client_party_ids=[34912145]),\n", + " LawFirm(name='Akin Gump Strauss Hauer & Feld', law_firm_id=1948224, client_party_ids=[27, 20020989, 1855]),\n", + " LawFirm(name='Hilliard & Shadowen', law_firm_id=2605659, client_party_ids=[9750]),\n", + " LawFirm(name='Roddy Klein & Ryan', law_firm_id=12953381, client_party_ids=[20036179]),\n", + " LawFirm(name='Kirschner & Gartrell', law_firm_id=14093126, client_party_ids=[20047290]),\n", + " LawFirm(name='Spector Roseman & Kodroff', law_firm_id=42582324, client_party_ids=[257121, 52552843, 37904, 23356662, 20047290, 24917852, 37648157]),\n", + " LawFirm(name='MONTGOMERY McCRACKEN WALKER & RHOADS', law_firm_id=45200100, client_party_ids=[1855]),\n", + " LawFirm(name='Harkins Cunningham', law_firm_id=62651519, client_party_ids=[20030256, 20027124]),\n", + " LawFirm(name='White and Williams', law_firm_id=76758445, client_party_ids=[20030256, 2993, 20036179, 20027124, 1855, 20020989, 1087]),\n", + " LawFirm(name='Law Offices of Robert W. Sink', law_firm_id=93515719, client_party_ids=[37904, 23356662, 20047290, 24917852, 37648157]),\n", + " LawFirm(name='Fox Rothschild', law_firm_id=226013496, client_party_ids=[20036179])]" + ] + }, + "execution_count": 108, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "case_data[0].law_firms" + ] }, { "cell_type": "code", - "execution_count": null, - "id": "9212a3e4-b281-4089-b053-b3305c008d35", + "execution_count": 109, + "id": "825628d6-1cdf-4ff4-b86b-f73bbd41b5a6", "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": [ + "[Party(name='Debra Langan', party_id=24917852, role='Plaintiff'),\n", + " Party(name='Pennsylvania Turnpike Commission', party_id=23356662, role='Plaintiff'),\n", + " Party(name='SHIRLEY PANEBIANO', party_id=257121, role='Plaintiff'),\n", + " Party(name='AvMed, Inc.', party_id=231694, role='Plaintiff'),\n", + " Party(name='Jeffrey R. Krinsk', party_id=34912145, role='Plaintiff'),\n", + " Party(name='District Council 37 Health and Security Plan', party_id=37648157, role='Plaintiff'),\n", + " Party(name='Eckerd Corporation', party_id=9750, role='Defendant'),\n", + " Party(name='Teva Pharmaceutical Industries Ltd.', party_id=2993, role='Defendant'),\n", + " Party(name='Teva Pharmaceuticals USA, Inc.', party_id=1087, role='Defendant'),\n", + " Party(name='Mylan Pharmaceuticals, Inc.', party_id=27, role='Defendant'),\n", + " Party(name='Barr Laboratories, Inc.', party_id=1855, role='Defendant'),\n", + " Party(name='End Payor Class Plaintiffs', party_id=52552843, role='Plaintiff'),\n", + " Party(name='Ranbaxy Laboratories Limited', party_id=20027124, role='Defendant'),\n", + " Party(name='Vista Health Plan, Inc.', party_id=20047290, role='Plaintiff'),\n", + " Party(name='Ranbaxy Pharmaceuticals Inc.', party_id=20030256, role='Defendant'),\n", + " Party(name='Cephalon, Inc.', party_id=20036179, role='Defendant'),\n", + " Party(name='Mylan Laboratories, Inc.', party_id=20020989, role='Defendant'),\n", + " Party(name='Pennsylvania Employees Benefit Trust Fund', party_id=37904, role='Plaintiff')]" + ] + }, + "execution_count": 109, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "case_data[0].parties" + ] }, { "cell_type": "code", - "execution_count": null, - "id": "4f28acd2-7ea9-419e-bcb4-5175f0570f0e", + "execution_count": 110, + "id": "8ac40da8-cd31-49a3-bdf0-16cbf08a77bb", "metadata": {}, "outputs": [], - "source": [] + "source": [ + "parties_by_id_by_case_id = {}" + ] }, { "cell_type": "code", - "execution_count": null, - "id": "a08c2a8d-6996-4a0a-b10c-afcef23d4799", + "execution_count": 113, + "id": "c4a47c85-6044-45c8-9e79-e3d21f62402d", "metadata": {}, "outputs": [], - "source": [] + "source": [ + "for c in case_data:\n", + " parties_by_id_by_case_id[c.district_case_id] = {}\n", + " for p in c.parties:\n", + " parties_by_id_by_case_id[c.district_case_id][p.party_id] = p" + ] }, { "cell_type": "code", - "execution_count": null, - "id": "c7d75145-ac26-4119-ad41-e5315579093f", + "execution_count": 114, + "id": "feed8a02-9637-4acd-ba53-99043debbd1f", "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": [ + "[97091, 48907, 2000009555, 2000026715, 2000028620]" + ] + }, + "execution_count": 114, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "list(parties_by_id_by_case_id.keys())[:5]" + ] }, { "cell_type": "code", - "execution_count": null, - "id": "618e7449-fe8c-4bf3-85bb-85d4646b1294", + "execution_count": 115, + "id": "baa34eab-f767-4cb9-818a-a543e031e57b", "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": [ + "{24917852: Party(name='Debra Langan', party_id=24917852, role='Plaintiff'),\n", + " 23356662: Party(name='Pennsylvania Turnpike Commission', party_id=23356662, role='Plaintiff'),\n", + " 257121: Party(name='SHIRLEY PANEBIANO', party_id=257121, role='Plaintiff'),\n", + " 231694: Party(name='AvMed, Inc.', party_id=231694, role='Plaintiff'),\n", + " 34912145: Party(name='Jeffrey R. Krinsk', party_id=34912145, role='Plaintiff'),\n", + " 37648157: Party(name='District Council 37 Health and Security Plan', party_id=37648157, role='Plaintiff'),\n", + " 9750: Party(name='Eckerd Corporation', party_id=9750, role='Defendant'),\n", + " 2993: Party(name='Teva Pharmaceutical Industries Ltd.', party_id=2993, role='Defendant'),\n", + " 1087: Party(name='Teva Pharmaceuticals USA, Inc.', party_id=1087, role='Defendant'),\n", + " 27: Party(name='Mylan Pharmaceuticals, Inc.', party_id=27, role='Defendant'),\n", + " 1855: Party(name='Barr Laboratories, Inc.', party_id=1855, role='Defendant'),\n", + " 52552843: Party(name='End Payor Class Plaintiffs', party_id=52552843, role='Plaintiff'),\n", + " 20027124: Party(name='Ranbaxy Laboratories Limited', party_id=20027124, role='Defendant'),\n", + " 20047290: Party(name='Vista Health Plan, Inc.', party_id=20047290, role='Plaintiff'),\n", + " 20030256: Party(name='Ranbaxy Pharmaceuticals Inc.', party_id=20030256, role='Defendant'),\n", + " 20036179: Party(name='Cephalon, Inc.', party_id=20036179, role='Defendant'),\n", + " 20020989: Party(name='Mylan Laboratories, Inc.', party_id=20020989, role='Defendant'),\n", + " 37904: Party(name='Pennsylvania Employees Benefit Trust Fund', party_id=37904, role='Plaintiff')}" + ] + }, + "execution_count": 115, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "parties_by_id_by_case_id[97091]" + ] }, { "cell_type": "code", - "execution_count": null, - "id": "ba4cb4af-3f34-4b16-bc8c-f2da794920b6", + "execution_count": 192, + "id": "b6068f2d-d06d-4ad3-8d4d-3a3d1dd9bab6", "metadata": {}, "outputs": [], - "source": [] + "source": [ + "column_names = ['case id', 'case number', 'case title', 'law_firm', 'law_firm_id', 'party', 'party_id', 'role']" + ] }, { "cell_type": "code", - "execution_count": null, - "id": "79777192-6cf6-4139-9d66-17b6b7594c7d", + "execution_count": 193, + "id": "64bd7dec-64a6-4c60-84e7-f4e96d056bc9", "metadata": {}, "outputs": [], - "source": [] + "source": [ + "rows = []" + ] }, { "cell_type": "code", - "execution_count": null, - "id": "91e2580a-702c-4491-ae5d-49309fef2174", + "execution_count": 194, + "id": "34198f78-afaf-47d9-8593-212f3e138fb2", "metadata": {}, "outputs": [], - "source": [] + "source": [ + "rows.append(column_names)" + ] }, { "cell_type": "code", - "execution_count": null, - "id": "a7cf2dfa-2344-4ac9-bf24-abcded22ea61", + "execution_count": 195, + "id": "e8cfcf6c-0c76-460c-8be8-c40185b80a45", "metadata": {}, "outputs": [], - "source": [] + "source": [ + "for c in case_data:\n", + " for law_firm in c.law_firms:\n", + " for party_id in law_firm.client_party_ids:\n", + " party = parties_by_id_by_case_id[c.district_case_id][party_id]\n", + " rows.append(\n", + " (\n", + " c.district_case_id,\n", + " c.case_no,\n", + " c.title,\n", + " law_firm.name,\n", + " law_firm.law_firm_id,\n", + " party.name,\n", + " party.party_id,\n", + " party.role\n", + " )\n", + " )\n", + " " + ] }, { "cell_type": "code", - "execution_count": null, - "id": "3a20cd68-dae1-4408-aba9-6b7dcb27a47e", + "execution_count": 196, + "id": "5fed65b3-31a2-4ec4-9e4b-2fe3435cded2", "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": [ + "19084" + ] + }, + "execution_count": 196, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(rows)" + ] }, { "cell_type": "code", - "execution_count": null, - "id": "e1ff2bb3-3595-489f-91c6-0d24d756f3e3", + "execution_count": 197, + "id": "c50fbd93-0f85-4d52-bee9-4759bf774e40", "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": [ + "['case id',\n", + " 'case number',\n", + " 'case title',\n", + " 'law_firm',\n", + " 'law_firm_id',\n", + " 'party',\n", + " 'party_id',\n", + " 'role']" + ] + }, + "execution_count": 197, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rows[0]" + ] }, { "cell_type": "code", - "execution_count": null, - "id": "4f510491-a5a9-4153-aa77-d458b52e9858", + "execution_count": 198, + "id": "a04480d5-2182-4693-a6ca-ccb8328ef5de", "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": [ + "(97091,\n", + " '2:06-cv-01833-MSG',\n", + " 'VISTA HEALTHPLAN, INC. v. CEPHALON, INC. et al',\n", + " 'Kessler Topaz Meltzer & Check',\n", + " 27,\n", + " 'SHIRLEY PANEBIANO',\n", + " 257121,\n", + " 'Plaintiff')" + ] + }, + "execution_count": 198, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rows[1]" + ] + }, + { + "cell_type": "code", + "execution_count": 199, + "id": "053c2b2a-03e5-4c7d-9878-51822201b376", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(2005150350,\n", + " '3:20-cv-05792-JD',\n", + " 'In re Google Play Developer Antitrust Litigation',\n", + " \"O'Melveny & Myers\",\n", + " 227639559,\n", + " 'Google Asia Pacific PTE. Limited',\n", + " 52824280,\n", + " 'Defendant')" + ] + }, + "execution_count": 199, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rows[10000]" + ] + }, + { + "cell_type": "code", + "execution_count": 201, + "id": "4a93247c-fd30-416a-9292-c9ac8ef79088", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(2034774512,\n", + " '3:24-cv-09118-VC',\n", + " 'Kushner et al v. Chunghwa Picture Tubes, Ltd. et al',\n", + " 'Goldman Scarlato & Penny',\n", + " 15211344,\n", + " 'Barry Kushner',\n", + " 10805,\n", + " 'Plaintiff')" + ] + }, + "execution_count": 201, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rows[-1]" + ] }, { "cell_type": "code", - "execution_count": null, - "id": "04366b77-aa4c-4805-b4d1-1049def3457b", + "execution_count": 140, + "id": "f613953c-3284-42f2-9ef7-111f507f6f01", "metadata": {}, "outputs": [], - "source": [] + "source": [ + "from openpyxl import Workbook" + ] }, { "cell_type": "code", - "execution_count": null, - "id": "280fe618-01ef-4763-b720-c5b2f40a12fa", + "execution_count": 202, + "id": "bf3996a5-6605-45b9-a250-a092b583e756", "metadata": {}, "outputs": [], - "source": [] + "source": [ + "wb.close()" + ] }, { "cell_type": "code", - "execution_count": null, - "id": "2189d583-8837-40dd-a250-8647bd9d2a11", + "execution_count": 203, + "id": "cd0f289b-cb72-4f93-8ec9-347fd3513293", "metadata": {}, "outputs": [], - "source": [] + "source": [ + "wb = Workbook()" + ] }, { "cell_type": "code", - "execution_count": null, - "id": "270b1183-8bce-440d-9b9a-f1a99e2f09bb", + "execution_count": 204, + "id": "d92307cb-c9d7-49a1-812d-05e68b09b448", "metadata": {}, "outputs": [], - "source": [] + "source": [ + "ws = wb.active" + ] }, { "cell_type": "code", - "execution_count": null, - "id": "319a6849-254f-4b6a-b220-8d2cb70b70e0", + "execution_count": 205, + "id": "038d3234-2147-497d-8c49-ee1b4f6e2913", "metadata": {}, "outputs": [], - "source": [] + "source": [ + "for r in rows:\n", + " ws.append(r)" + ] }, { "cell_type": "code", - "execution_count": null, - "id": "9cbd1d69-08c3-420d-b0fb-dae0da8096ca", + "execution_count": 206, + "id": "662327a4-07d1-4de2-84ac-38935232d567", "metadata": {}, "outputs": [], - "source": [] + "source": [ + "wb.save(\"antitrust_terminated_2024_law_firms.xlsx\")" + ] }, { "cell_type": "code", - "execution_count": null, - "id": "43b7a5ee-047a-486d-ba08-f3e9b309de59", + "execution_count": 207, + "id": "319a6849-254f-4b6a-b220-8d2cb70b70e0", "metadata": {}, "outputs": [], - "source": [] + "source": [ + "wb.close()" + ] } ], "metadata": { From b7e53688b4de53489b37ab0c88c410527a859f29 Mon Sep 17 00:00:00 2001 From: Frances Wong Date: Tue, 6 May 2025 16:38:33 -0600 Subject: [PATCH 12/21] [#15] fix typos --- docs/spreadsheet_example.rst | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/docs/spreadsheet_example.rst b/docs/spreadsheet_example.rst index 028e3d6..84da4f4 100644 --- a/docs/spreadsheet_example.rst +++ b/docs/spreadsheet_example.rst @@ -142,7 +142,7 @@ First let's sort judges by case counts: ) -To see the duration stats for the top five judges: +Now let's check duration stats for the top five judges: .. code-block:: python @@ -214,13 +214,15 @@ First, lets create the rows. We'll first determine which columns we want and the c.district_case_id, c.case_no, c.title, - (law_firm.name, law_firm.law_firm_id), - (party.name, party.party_id), + law_firm.name, + law_firm.law_firm_id, + party.name, + party.party_id, party.role + ) ) - ) - -And checking a few of them, including the header to make sure we added it. + +Now we'll spot check a few of them, including the header to make sure we added it. .. code-block:: python @@ -269,7 +271,7 @@ And checking a few of them, including the header to make sure we added it. 'Plaintiff') -Now let's add these rows to a spreadsheet. We could have created this list directly in the previous step, but it's useful to make sure things look good first in a readable way. +Now let's add these rows to a spreadsheet. For this example we'll be using `openpyxl `_, which you can install using ``pip install openpyxl``. @@ -290,8 +292,7 @@ For this example we'll be using `openpyxl Date: Wed, 7 May 2025 15:52:49 -0600 Subject: [PATCH 13/21] [#15] Remove api client model customization to reduce maintenance overhead; adjust docs --- custom_templates/api_client.mustache | 815 --------------------------- docs/quickstart.rst | 20 +- docs/spreadsheet_example.rst | 11 +- 3 files changed, 15 insertions(+), 831 deletions(-) delete mode 100644 custom_templates/api_client.mustache diff --git a/custom_templates/api_client.mustache b/custom_templates/api_client.mustache deleted file mode 100644 index 15990d0..0000000 --- a/custom_templates/api_client.mustache +++ /dev/null @@ -1,815 +0,0 @@ -# coding: utf-8 - -{{>partial_header}} - -import datetime -from dateutil.parser import parse -from enum import Enum -import decimal -import json -import mimetypes -import os -import re -import tempfile - -from urllib.parse import quote -from typing import Tuple, Optional, List, Dict, Union -from pydantic import SecretStr -{{#tornado}} -import tornado.gen -{{/tornado}} - -from {{packageName}}.configuration import Configuration -from {{packageName}}.api_response import ApiResponse, T as ApiResponseT -import {{modelPackage}} -from {{packageName}} import rest -from {{packageName}}.exceptions import ( - ApiValueError, - ApiException, - BadRequestException, - UnauthorizedException, - ForbiddenException, - NotFoundException, - ServiceException -) - -RequestSerialized = Tuple[str, str, Dict[str, str], Optional[str], List[str]] - -class ApiClient: - """Generic API client for OpenAPI client library builds. - - OpenAPI generic API client. This client handles the client- - server communication, and is invariant across implementations. Specifics of - the methods and models for each application are generated from the OpenAPI - templates. - - :param configuration: .Configuration object for this client - :param header_name: a header to pass when making calls to the API. - :param header_value: a header value to pass when making calls to - the API. - :param cookie: a cookie to include in the header when making calls - to the API - """ - - PRIMITIVE_TYPES = (float, bool, bytes, str, int) - NATIVE_TYPES_MAPPING = { - 'int': int, - 'long': int, # TODO remove as only py3 is supported? - 'float': float, - 'str': str, - 'bool': bool, - 'date': datetime.date, - 'datetime': datetime.datetime, - 'decimal': decimal.Decimal, - 'object': object, - } - _pool = None - - def __init__( - self, - configuration=None, - header_name=None, - header_value=None, - cookie=None - ) -> None: - # use default configuration if none is provided - if configuration is None: - configuration = Configuration.get_default() - self.configuration = configuration - - self.rest_client = rest.RESTClientObject(configuration) - self.default_headers = {} - if header_name is not None: - self.default_headers[header_name] = header_value - self.cookie = cookie - # Set default User-Agent. - self.user_agent = '{{{httpUserAgent}}}{{^httpUserAgent}}OpenAPI-Generator/{{{packageVersion}}}/python{{/httpUserAgent}}' - self.client_side_validation = configuration.client_side_validation - -{{#asyncio}} - async def __aenter__(self): - return self - - async def __aexit__(self, exc_type, exc_value, traceback): - await self.close() - - async def close(self): - await self.rest_client.close() -{{/asyncio}} -{{^asyncio}} - def __enter__(self): - return self - - def __exit__(self, exc_type, exc_value, traceback): - self.rest_client.pool_manager.clear() - self.configuration = None -{{/asyncio}} - - @property - def user_agent(self): - """User agent for this API client""" - return self.default_headers['User-Agent'] - - @user_agent.setter - def user_agent(self, value): - self.default_headers['User-Agent'] = value - - def set_default_header(self, header_name, header_value): - self.default_headers[header_name] = header_value - - - _default = None - - @classmethod - def get_default(cls): - """Return new instance of ApiClient. - - This method returns newly created, based on default constructor, - object of ApiClient class or returns a copy of default - ApiClient. - - :return: The ApiClient object. - """ - if cls._default is None: - cls._default = ApiClient() - return cls._default - - @classmethod - def set_default(cls, default): - """Set default instance of ApiClient. - - It stores default ApiClient. - - :param default: object of ApiClient. - """ - cls._default = default - - def param_serialize( - self, - method, - resource_path, - path_params=None, - query_params=None, - header_params=None, - body=None, - post_params=None, - files=None, auth_settings=None, - collection_formats=None, - _host=None, - _request_auth=None - ) -> RequestSerialized: - - """Builds the HTTP request params needed by the request. - :param method: Method to call. - :param resource_path: Path to method endpoint. - :param path_params: Path parameters in the url. - :param query_params: Query parameters in the url. - :param header_params: Header parameters to be - placed in the request header. - :param body: Request body. - :param post_params dict: Request post form parameters, - for `application/x-www-form-urlencoded`, `multipart/form-data`. - :param auth_settings list: Auth Settings names for the request. - :param files dict: key -> filename, value -> filepath, - for `multipart/form-data`. - :param collection_formats: dict of collection formats for path, query, - header, and post parameters. - :param _request_auth: set to override the auth_settings for an a single - request; this effectively ignores the authentication - in the spec for a single request. - :return: tuple of form (path, http_method, query_params, header_params, - body, post_params, files) - """ - - config = self.configuration - - # header parameters - header_params = header_params or {} - header_params.update(self.default_headers) - if self.cookie: - header_params['Cookie'] = self.cookie - if header_params: - header_params = self.sanitize_for_serialization(header_params) - header_params = dict( - self.parameters_to_tuples(header_params,collection_formats) - ) - - # path parameters - if path_params: - path_params = self.sanitize_for_serialization(path_params) - path_params = self.parameters_to_tuples( - path_params, - collection_formats - ) - for k, v in path_params: - # specified safe chars, encode everything - resource_path = resource_path.replace( - '{%s}' % k, - quote(str(v), safe=config.safe_chars_for_path_param) - ) - - # post parameters - if post_params or files: - post_params = post_params if post_params else [] - post_params = self.sanitize_for_serialization(post_params) - post_params = self.parameters_to_tuples( - post_params, - collection_formats - ) - if files: - post_params.extend(self.files_parameters(files)) - - # auth setting - self.update_params_for_auth( - header_params, - query_params, - auth_settings, - resource_path, - method, - body, - request_auth=_request_auth - ) - - # body - if body: - body = self.sanitize_for_serialization(body) - - # request url - if _host is None or self.configuration.ignore_operation_servers: - url = self.configuration.host + resource_path - else: - # use server/host defined in path or operation instead - url = _host + resource_path - - # query parameters - if query_params: - query_params = self.sanitize_for_serialization(query_params) - url_query = self.parameters_to_url_query( - query_params, - collection_formats - ) - url += "?" + url_query - - return method, url, header_params, body, post_params - - - {{#tornado}} - @tornado.gen.coroutine - {{/tornado}} - {{#asyncio}}async {{/asyncio}}def call_api( - self, - method, - url, - header_params=None, - body=None, - post_params=None, - _request_timeout=None - ) -> rest.RESTResponse: - """Makes the HTTP request (synchronous) - :param method: Method to call. - :param url: Path to method endpoint. - :param header_params: Header parameters to be - placed in the request header. - :param body: Request body. - :param post_params dict: Request post form parameters, - for `application/x-www-form-urlencoded`, `multipart/form-data`. - :param _request_timeout: timeout setting for this request. - :return: RESTResponse - """ - - try: - # perform request and return response - response_data = {{#asyncio}}await {{/asyncio}}{{#tornado}}yield {{/tornado}}self.rest_client.request( - method, url, - headers=header_params, - body=body, post_params=post_params, - _request_timeout=_request_timeout - ) - - except ApiException as e: - raise e - - return response_data - - def response_deserialize( - self, - response_data: rest.RESTResponse, - response_types_map: Optional[Dict[str, ApiResponseT]]=None - ) -> ApiResponse[ApiResponseT]: - """Deserializes response into an object. - :param response_data: RESTResponse object to be deserialized. - :param response_types_map: dict of response types. - :return: ApiResponse - """ - - msg = "RESTResponse.read() must be called before passing it to response_deserialize()" - assert response_data.data is not None, msg - - response_type = response_types_map.get(str(response_data.status), None) - if not response_type and isinstance(response_data.status, int) and 100 <= response_data.status <= 599: - # if not found, look for '1XX', '2XX', etc. - response_type = response_types_map.get(str(response_data.status)[0] + "XX", None) - - # deserialize response data - response_text = None - return_data = None - try: - if response_type == "bytearray": - return_data = response_data.data - elif response_type == "file": - return_data = self.__deserialize_file(response_data) - elif response_type is not None: - match = None - content_type = response_data.getheader('content-type') - if content_type is not None: - match = re.search(r"charset=([a-zA-Z\-\d]+)[\s;]?", content_type) - encoding = match.group(1) if match else "utf-8" - response_text = response_data.data.decode(encoding) - return_data = self.deserialize(response_text, response_type, content_type) - finally: - if not 200 <= response_data.status <= 299: - raise ApiException.from_response( - http_resp=response_data, - body=response_text, - data=return_data, - ) - - return ApiResponse( - status_code = response_data.status, - data = return_data, - headers = response_data.getheaders(), - raw_data = response_data.data - ) - - def sanitize_for_serialization(self, obj): - """Builds a JSON POST object. - - If obj is None, return None. - If obj is SecretStr, return obj.get_secret_value() - If obj is str, int, long, float, bool, return directly. - If obj is datetime.datetime, datetime.date - convert to string in iso8601 format. - If obj is decimal.Decimal return string representation. - If obj is list, sanitize each element in the list. - If obj is dict, return the dict. - If obj is OpenAPI model, return the properties dict. - - :param obj: The data to serialize. - :return: The serialized form of data. - """ - if obj is None: - return None - elif isinstance(obj, Enum): - return obj.value - elif isinstance(obj, SecretStr): - return obj.get_secret_value() - elif isinstance(obj, self.PRIMITIVE_TYPES): - return obj - elif isinstance(obj, list): - return [ - self.sanitize_for_serialization(sub_obj) for sub_obj in obj - ] - elif isinstance(obj, tuple): - return tuple( - self.sanitize_for_serialization(sub_obj) for sub_obj in obj - ) - elif isinstance(obj, (datetime.datetime, datetime.date)): - return obj.isoformat() - elif isinstance(obj, decimal.Decimal): - return str(obj) - - elif isinstance(obj, dict): - obj_dict = obj - else: - # Convert model obj to dict except - # attributes `openapi_types`, `attribute_map` - # and attributes which value is not None. - # Convert attribute name to json key in - # model definition for request. - if hasattr(obj, 'to_dict') and callable(getattr(obj, 'to_dict')): - obj_dict = obj.to_dict() - else: - obj_dict = obj.__dict__ - - return { - key: self.sanitize_for_serialization(val) - for key, val in obj_dict.items() - } - - def deserialize(self, response_text: str, response_type: str, content_type: Optional[str]): - """Deserializes response into an object. - - :param response: RESTResponse object to be deserialized. - :param response_type: class literal for - deserialized object, or string of class name. - :param content_type: content type of response. - - :return: deserialized object. - """ - - # fetch data from response object - if content_type is None: - try: - data = json.loads(response_text) - except ValueError: - data = response_text - elif re.match(r'^application/(json|[\w!#$&.+-^_]+\+json)\s*(;|$)', content_type, re.IGNORECASE): - if response_text == "": - data = "" - else: - data = json.loads(response_text) - elif re.match(r'^text\/[a-z.+-]+\s*(;|$)', content_type, re.IGNORECASE): - data = response_text - else: - raise ApiException( - status=0, - reason="Unsupported content type: {0}".format(content_type) - ) - - return self.__deserialize(data, response_type) - - def __deserialize(self, data, klass): - """Deserializes dict, list, str into an object. - - :param data: dict, list or str. - :param klass: class literal, or string of class name. - - :return: object. - """ - if data is None: - return None - - if isinstance(klass, str): - if klass.startswith('List['): - m = re.match(r'List\[(.*)]', klass) - assert m is not None, "Malformed List type definition" - sub_kls = m.group(1) - return [self.__deserialize(sub_data, sub_kls) - for sub_data in data] - - if klass.startswith('Dict['): - m = re.match(r'Dict\[([^,]*), (.*)]', klass) - assert m is not None, "Malformed Dict type definition" - sub_kls = m.group(2) - return {k: self.__deserialize(v, sub_kls) - for k, v in data.items()} - - # convert str to class - if klass in self.NATIVE_TYPES_MAPPING: - klass = self.NATIVE_TYPES_MAPPING[klass] - else: - klass = getattr({{modelPackage}}, klass) - - if klass in self.PRIMITIVE_TYPES: - return self.__deserialize_primitive(data, klass) - elif klass == object: - return self.__deserialize_object(data) - elif klass == datetime.date: - return self.__deserialize_date(data) - elif klass == datetime.datetime: - return self.__deserialize_datetime(data) - elif klass == decimal.Decimal: - return decimal.Decimal(data) - elif issubclass(klass, Enum): - return self.__deserialize_enum(data, klass) - else: - return self.__deserialize_model(data, klass) - - def parameters_to_tuples(self, params, collection_formats): - """Get parameters as list of tuples, formatting collections. - - :param params: Parameters as dict or list of two-tuples - :param dict collection_formats: Parameter collection formats - :return: Parameters as list of tuples, collections formatted - """ - new_params: List[Tuple[str, str]] = [] - if collection_formats is None: - collection_formats = {} - for k, v in params.items() if isinstance(params, dict) else params: - if k in collection_formats: - collection_format = collection_formats[k] - if collection_format == 'multi': - new_params.extend((k, value) for value in v) - else: - if collection_format == 'ssv': - delimiter = ' ' - elif collection_format == 'tsv': - delimiter = '\t' - elif collection_format == 'pipes': - delimiter = '|' - else: # csv is the default - delimiter = ',' - new_params.append( - (k, delimiter.join(str(value) for value in v))) - else: - new_params.append((k, v)) - return new_params - - def parameters_to_url_query(self, params, collection_formats): - """Get parameters as list of tuples, formatting collections. - - :param params: Parameters as dict or list of two-tuples - :param dict collection_formats: Parameter collection formats - :return: URL query string (e.g. a=Hello%20World&b=123) - """ - new_params: List[Tuple[str, str]] = [] - if collection_formats is None: - collection_formats = {} - for k, v in params.items() if isinstance(params, dict) else params: - if isinstance(v, bool): - v = str(v).lower() - if isinstance(v, (int, float)): - v = str(v) - if isinstance(v, dict): - v = json.dumps(v) - - if k in collection_formats: - collection_format = collection_formats[k] - if collection_format == 'multi': - new_params.extend((k, quote(str(value))) for value in v) - else: - if collection_format == 'ssv': - delimiter = ' ' - elif collection_format == 'tsv': - delimiter = '\t' - elif collection_format == 'pipes': - delimiter = '|' - else: # csv is the default - delimiter = ',' - new_params.append( - (k, delimiter.join(quote(str(value)) for value in v)) - ) - else: - new_params.append((k, quote(str(v)))) - - return "&".join(["=".join(map(str, item)) for item in new_params]) - - def files_parameters( - self, - files: Dict[str, Union[str, bytes, List[str], List[bytes], Tuple[str, bytes]]], - ): - """Builds form parameters. - - :param files: File parameters. - :return: Form parameters with files. - """ - params = [] - for k, v in files.items(): - if isinstance(v, str): - with open(v, 'rb') as f: - filename = os.path.basename(f.name) - filedata = f.read() - elif isinstance(v, bytes): - filename = k - filedata = v - elif isinstance(v, tuple): - filename, filedata = v - elif isinstance(v, list): - for file_param in v: - params.extend(self.files_parameters({k: file_param})) - continue - else: - raise ValueError("Unsupported file value") - mimetype = ( - mimetypes.guess_type(filename)[0] - or 'application/octet-stream' - ) - params.append( - tuple([k, tuple([filename, filedata, mimetype])]) - ) - return params - - def select_header_accept(self, accepts: List[str]) -> Optional[str]: - """Returns `Accept` based on an array of accepts provided. - - :param accepts: List of headers. - :return: Accept (e.g. application/json). - """ - if not accepts: - return None - - for accept in accepts: - if re.search('json', accept, re.IGNORECASE): - return accept - - return accepts[0] - - def select_header_content_type(self, content_types): - """Returns `Content-Type` based on an array of content_types provided. - - :param content_types: List of content-types. - :return: Content-Type (e.g. application/json). - """ - if not content_types: - return None - - for content_type in content_types: - if re.search('json', content_type, re.IGNORECASE): - return content_type - - return content_types[0] - - def update_params_for_auth( - self, - headers, - queries, - auth_settings, - resource_path, - method, - body, - request_auth=None - ) -> None: - """Updates header and query params based on authentication setting. - - :param headers: Header parameters dict to be updated. - :param queries: Query parameters tuple list to be updated. - :param auth_settings: Authentication setting identifiers list. - :resource_path: A string representation of the HTTP request resource path. - :method: A string representation of the HTTP request method. - :body: A object representing the body of the HTTP request. - The object type is the return value of sanitize_for_serialization(). - :param request_auth: if set, the provided settings will - override the token in the configuration. - """ - if not auth_settings: - return - - if request_auth: - self._apply_auth_params( - headers, - queries, - resource_path, - method, - body, - request_auth - ) - else: - for auth in auth_settings: - auth_setting = self.configuration.auth_settings().get(auth) - if auth_setting: - self._apply_auth_params( - headers, - queries, - resource_path, - method, - body, - auth_setting - ) - - def _apply_auth_params( - self, - headers, - queries, - resource_path, - method, - body, - auth_setting - ) -> None: - """Updates the request parameters based on a single auth_setting - - :param headers: Header parameters dict to be updated. - :param queries: Query parameters tuple list to be updated. - :resource_path: A string representation of the HTTP request resource path. - :method: A string representation of the HTTP request method. - :body: A object representing the body of the HTTP request. - The object type is the return value of sanitize_for_serialization(). - :param auth_setting: auth settings for the endpoint - """ - if auth_setting['in'] == 'cookie': - headers['Cookie'] = auth_setting['value'] - elif auth_setting['in'] == 'header': - if auth_setting['type'] != 'http-signature': - headers[auth_setting['key']] = auth_setting['value'] - {{#hasHttpSignatureMethods}} - else: - # The HTTP signature scheme requires multiple HTTP headers - # that are calculated dynamically. - signing_info = self.configuration.signing_info - auth_headers = signing_info.get_http_signature_headers( - resource_path, method, headers, body, queries) - headers.update(auth_headers) - {{/hasHttpSignatureMethods}} - elif auth_setting['in'] == 'query': - queries.append((auth_setting['key'], auth_setting['value'])) - else: - raise ApiValueError( - 'Authentication token must be in `query` or `header`' - ) - - def __deserialize_file(self, response): - """Deserializes body to file - - Saves response body into a file in a temporary folder, - using the filename from the `Content-Disposition` header if provided. - - handle file downloading - save response body into a tmp file and return the instance - - :param response: RESTResponse. - :return: file path. - """ - fd, path = tempfile.mkstemp(dir=self.configuration.temp_folder_path) - os.close(fd) - os.remove(path) - - content_disposition = response.getheader("Content-Disposition") - if content_disposition: - m = re.search( - r'filename=[\'"]?([^\'"\s]+)[\'"]?', - content_disposition - ) - assert m is not None, "Unexpected 'content-disposition' header value" - filename = m.group(1) - path = os.path.join(os.path.dirname(path), filename) - - with open(path, "wb") as f: - f.write(response.data) - - return path - - def __deserialize_primitive(self, data, klass): - """Deserializes string to primitive type. - - :param data: str. - :param klass: class literal. - - :return: int, long, float, str, bool. - """ - try: - return klass(data) - except UnicodeEncodeError: - return str(data) - except TypeError: - return data - - def __deserialize_object(self, value): - """Return an original value. - - :return: object. - """ - return value - - def __deserialize_date(self, string): - """Deserializes string to date. - - :param string: str. - :return: date. - """ - try: - return parse(string).date() - except ImportError: - return string - except ValueError: - raise rest.ApiException( - status=0, - reason="Failed to parse `{0}` as date object".format(string) - ) - - def __deserialize_datetime(self, string): - """Deserializes string to datetime. - - The string should be in iso8601 datetime format. - - :param string: str. - :return: datetime. - """ - try: - return parse(string) - except ImportError: - return string - except ValueError: - raise rest.ApiException( - status=0, - reason=( - "Failed to parse `{0}` as datetime object" - .format(string) - ) - ) - - def __deserialize_enum(self, data, klass): - """Deserializes primitive type to enum. - - :param data: primitive type. - :param klass: class literal. - :return: enum value. - """ - try: - return klass(data) - except ValueError: - raise rest.ApiException( - status=0, - reason=( - "Failed to parse `{0}` as `{1}`" - .format(data, klass) - ) - ) - - def __deserialize_model(self, data, klass): - """Deserializes list or dict to model. - - :param data: dict, list. - :param klass: class literal. - :return: model object. - """ - - return klass.from_dict(data) diff --git a/docs/quickstart.rst b/docs/quickstart.rst index cbb3340..562d118 100644 --- a/docs/quickstart.rst +++ b/docs/quickstart.rst @@ -9,7 +9,7 @@ To get detailed information on an individual case, we'll need the Lex Machina id In the first code block, you will see the following steps: -#. We configure the client. We'll use a context manager to take advantage of the automatic cleanup it does for us (clearing out the connection pool and the configuration). +#. We configure the client. This requires you first get a bearer token and set its value to the environment variable ``BEARER_TOKEN``. #. Using the configured client, we create an object with access to the Federal District Case endpoints. #. We do a case number search. For this example, we search for the Samsung Electronics v. Sandisk Corporation, case number 9:02-cv-00058-JH. The case number search ignores judge initials at the end of a case number so they've been left out in the search example below. But they could be left in and the results would be the same. We further refine the search by using the optional court filter to limit our search to cases in the U.S. District Court for the Eastern District of Texas. @@ -23,11 +23,13 @@ In the first code block, you will see the following steps: access_token=os.environ["BEARER_TOKEN"] ) - with lexmachina.ApiClient(configuration) as api_client: - fed_dist_case_api_instance = lexmachina.FederalDistrictCasesApi(api_client) - case_search_results = fed_dist_case_api_instance.find_district_case_by_number( - case_numbers=["9:02-cv-00058"], court="txed" - ) + api_client = lexmachina.ApiClient(configuration) + + fed_dist_case_api_instance = lexmachina.FederalDistrictCasesApi(api_client) + + case_search_results = fed_dist_case_api_instance.find_district_case_by_number( + case_numbers=["9:02-cv-00058"], court="txed" + ) When we look at ``case_search_results``, this search conveniently returns just one result (if we had left out the court filter, it would have returned three results): @@ -54,9 +56,9 @@ We can see from the output above the Lex Machina id for the case is 88. We will .. code-block:: python - with lexmachina.ApiClient(configuration) as api_client: - fed_dist_case_api_instance = lexmachina.FederalDistrictCasesApi(api_client) - apple_v_sandisk_case = fed_dist_case_api_instance.get_district_case(88) + fed_dist_case_api_instance = lexmachina.FederalDistrictCasesApi(api_client) + + apple_v_sandisk_case = fed_dist_case_api_instance.get_district_case(88) Just for example purposes, here is a sampling of data provided for this individual case. You'll see a number of ids that you can then use to get more information on invidual judges, law firms, attorneys, and parties. diff --git a/docs/spreadsheet_example.rst b/docs/spreadsheet_example.rst index 84da4f4..e3a76f1 100644 --- a/docs/spreadsheet_example.rst +++ b/docs/spreadsheet_example.rst @@ -8,7 +8,7 @@ In the :doc:`quickstart`, we looked for an individual case. In this example, we' For this example, we'll look at Antitrust cases terminated in 2024, do some light analysis, and add the cases to a spreadsheet. A jupyter notebook with this code can be found in `the examples folder `_ -In the :doc:`quickstart`, you saw how we created an API client object using a context manager. We then used the API client to create an object with access to the Federal District case API endpoints: +In the :doc:`quickstart`, you saw how we created an API client object which we then used to create an object with access to the Federal District case API endpoints: .. code-block:: python @@ -20,14 +20,11 @@ In the :doc:`quickstart`, you saw how we created an API client object using a co access_token=os.environ["BEARER_TOKEN"] ) - with lexmachina.ApiClient(configuration) as api_client: - fed_dist_case_api_instance = lexmachina.FederalDistrictCasesApi(api_client) + api_client = lexmachina.ApiClient(configuration) + fed_dist_case_api_instance = lexmachina.FederalDistrictCasesApi(api_client) -To simplify the code blocks used in this example, we'll simply refer to the ``fed_dist_case_api_instance`` created in the code block above. - - -First, we'll create a query for Antitrust cases terminated in 2024. The API returns results in "pages" with each page showing a maximum of 100 results. If your search returns more than 100 results, you'll need to page through them. +To get started on our cases search, we'll create a query for Antitrust cases terminated in 2024 in the form of a dictionary. The API returns results in "pages" with each page showing a maximum of 100 results. If your search returns more than 100 results, you'll need to page through them. You'll see in the example below we request the first page to start. .. code-block:: python From d3ba945bfab7ec7bfb3d9f8b610a8157e9f284ea Mon Sep 17 00:00:00 2001 From: Frances Wong Date: Wed, 7 May 2025 16:19:07 -0600 Subject: [PATCH 14/21] [#15] Use the read the docs sphinx theme, which gives you next and previous buttons --- docs/conf.py | 7 ++----- docs/installation.rst | 3 --- docs/quickstart.rst | 5 ----- docs/spreadsheet_example.rst | 4 +--- 4 files changed, 3 insertions(+), 16 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 951a1d1..607d08d 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -5,6 +5,7 @@ # -- Project information ----------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information +from datetime import datetime, timezone project = "lexmachina-client" copyright = "2025, support@lexmachina.com" @@ -19,9 +20,5 @@ templates_path = ["_templates"] exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] - -# -- Options for HTML output ------------------------------------------------- -# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output - -html_theme = "alabaster" +html_theme = "sphinx_rtd_theme" html_static_path = ["_static"] diff --git a/docs/installation.rst b/docs/installation.rst index a097882..7b10af7 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -24,6 +24,3 @@ Then set that bearer token in the environment variable: .. code-block:: bash $ export BEARER_TOKEN= - - -Next: :doc:`quickstart` diff --git a/docs/quickstart.rst b/docs/quickstart.rst index 562d118..b1a1b22 100644 --- a/docs/quickstart.rst +++ b/docs/quickstart.rst @@ -94,11 +94,6 @@ This example uses the case number search endpoint to find the case id, but there To know your search options, it helps to be familiar with the user-facing Lex Machina website. We recognize that, for new users, the search options are not always immediately obvious. If you would like any help using the Lex Machina API, please contact support@lexmachina.com. -Next: :doc:`spreadsheet_example` -Previous: :doc:`installation` - - - diff --git a/docs/spreadsheet_example.rst b/docs/spreadsheet_example.rst index e3a76f1..b4cd173 100644 --- a/docs/spreadsheet_example.rst +++ b/docs/spreadsheet_example.rst @@ -21,6 +21,7 @@ In the :doc:`quickstart`, you saw how we created an API client object which we t ) api_client = lexmachina.ApiClient(configuration) + fed_dist_case_api_instance = lexmachina.FederalDistrictCasesApi(api_client) @@ -290,6 +291,3 @@ For this example we'll be using `openpyxl Date: Wed, 7 May 2025 16:23:05 -0600 Subject: [PATCH 15/21] [#15] Regenerate without custom api_client.py template --- src/lexmachina/api_client.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/lexmachina/api_client.py b/src/lexmachina/api_client.py index a2be914..a8df1e7 100644 --- a/src/lexmachina/api_client.py +++ b/src/lexmachina/api_client.py @@ -97,8 +97,7 @@ def __enter__(self): return self def __exit__(self, exc_type, exc_value, traceback): - self.rest_client.pool_manager.clear() - self.configuration = None + pass @property def user_agent(self): From eba627cebb7a637d5e1421ec2ac55403ba5fc6a6 Mon Sep 17 00:00:00 2001 From: Frances Wong Date: Thu, 8 May 2025 15:14:29 -0600 Subject: [PATCH 16/21] [#15] Add readthedocs config and requirements for docs --- .readtheedocs.yaml | 28 +++++++++++ pyproject.toml | 3 +- requirements-docs.txt | 105 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 135 insertions(+), 1 deletion(-) create mode 100644 .readtheedocs.yaml create mode 100644 requirements-docs.txt diff --git a/.readtheedocs.yaml b/.readtheedocs.yaml new file mode 100644 index 0000000..73f10a9 --- /dev/null +++ b/.readtheedocs.yaml @@ -0,0 +1,28 @@ +# Read the Docs configuration file +# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details + +# Required +version: 2 + +# Set the OS, Python version, and other tools you might need +build: + os: ubuntu-24.04 + tools: + python: "3.13" + +# Build documentation in the "docs/" directory with Sphinx +sphinx: + configuration: docs/conf.py + +# Optionally, but recommended, +# declare the Python requirements required to build your documentation +# See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html +# python: +# install: +# - requirements: docs/requirements.txt +python: + install: + - method: pip + path: . + extra_requirements: + - docs diff --git a/pyproject.toml b/pyproject.toml index cbdb268..d763cee 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -66,7 +66,8 @@ exclude_lines = ["no cov", "if __name__ == .__main__.:", "if TYPE_CHECKING:"] [project.optional-dependencies] tests = ["pytest >=7.2.1", "pytest-asyncio >= 0.21.0"] -dev = ['ipython >= 8.14.0', 'jupyterlab >= 4.0.0'] +dev = ["ipython >= 8.14.0", "jupyterlab >= 4.0.0"] +docs = ["sphinx>=8.2.3", "sphinx-rtd-theme>=3.0.2"] [tool.hatch.build.targets.wheel] diff --git a/requirements-docs.txt b/requirements-docs.txt new file mode 100644 index 0000000..3ec5c6f --- /dev/null +++ b/requirements-docs.txt @@ -0,0 +1,105 @@ +# +# This file is autogenerated by pip-compile with Python 3.11 +# by the following command: +# +# pip-compile --extra=docs --output-file=requirements-docs.txt pyproject.toml +# +aiohappyeyeballs==2.6.1 + # via aiohttp +aiohttp==3.11.18 + # via lexmachina-client (pyproject.toml) +aiosignal==1.3.2 + # via aiohttp +alabaster==1.0.0 + # via sphinx +annotated-types==0.7.0 + # via pydantic +attrs==25.3.0 + # via aiohttp +babel==2.17.0 + # via sphinx +certifi==2025.4.26 + # via requests +charset-normalizer==3.4.2 + # via requests +docutils==0.21.2 + # via + # sphinx + # sphinx-rtd-theme +frozenlist==1.6.0 + # via + # aiohttp + # aiosignal +idna==3.10 + # via + # requests + # yarl +imagesize==1.4.1 + # via sphinx +jinja2==3.1.6 + # via sphinx +markupsafe==3.0.2 + # via jinja2 +multidict==6.4.3 + # via + # aiohttp + # yarl +packaging==25.0 + # via sphinx +propcache==0.3.1 + # via + # aiohttp + # yarl +pydantic==2.11.4 + # via lexmachina-client (pyproject.toml) +pydantic-core==2.33.2 + # via pydantic +pygments==2.19.1 + # via sphinx +python-dateutil==2.9.0.post0 + # via lexmachina-client (pyproject.toml) +requests==2.32.3 + # via + # lexmachina-client (pyproject.toml) + # sphinx +roman-numerals-py==3.1.0 + # via sphinx +six==1.17.0 + # via python-dateutil +snowballstemmer==3.0.0.1 + # via sphinx +sphinx==8.2.3 + # via + # lexmachina-client (pyproject.toml) + # sphinx-rtd-theme + # sphinxcontrib-jquery +sphinx-rtd-theme==3.0.2 + # via lexmachina-client (pyproject.toml) +sphinxcontrib-applehelp==2.0.0 + # via sphinx +sphinxcontrib-devhelp==2.0.0 + # via sphinx +sphinxcontrib-htmlhelp==2.1.0 + # via sphinx +sphinxcontrib-jquery==4.1 + # via sphinx-rtd-theme +sphinxcontrib-jsmath==1.0.1 + # via sphinx +sphinxcontrib-qthelp==2.0.0 + # via sphinx +sphinxcontrib-serializinghtml==2.0.0 + # via sphinx +typing-extensions==4.13.2 + # via + # lexmachina-client (pyproject.toml) + # pydantic + # pydantic-core + # typing-inspection +typing-inspection==0.4.0 + # via pydantic +urllib3==2.4.0 + # via + # lexmachina-client (pyproject.toml) + # requests +yarl==1.20.0 + # via aiohttp From 5ffe34552563041386d2b2940846653daeb7d865 Mon Sep 17 00:00:00 2001 From: Frances Wong Date: Thu, 8 May 2025 15:19:57 -0600 Subject: [PATCH 17/21] [#15] Spell readthedocs right --- .readtheedocs.yaml => .readthedocs.yaml | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename .readtheedocs.yaml => .readthedocs.yaml (100%) diff --git a/.readtheedocs.yaml b/.readthedocs.yaml similarity index 100% rename from .readtheedocs.yaml rename to .readthedocs.yaml From d54a24cddd6825e4a843fbd8c7f15273f746e197 Mon Sep 17 00:00:00 2001 From: Frances Wong Date: Thu, 8 May 2025 15:46:05 -0600 Subject: [PATCH 18/21] [#15] fix various typos --- docs/installation.rst | 2 +- docs/quickstart.rst | 6 ++---- docs/spreadsheet_example.rst | 16 ++++++++-------- 3 files changed, 11 insertions(+), 13 deletions(-) diff --git a/docs/installation.rst b/docs/installation.rst index 7b10af7..43447c6 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -8,7 +8,7 @@ Install using pip within a virtual environment: $ pip install lexmachina-client -The examples in the quickstart assume a valid bearer token is set in the environment variable ``BEARER_TOKEN``. You can get a bearer token by using the ``/oauth2/token`` endpoint: +The examples in the :doc:`quickstart` and :doc:`spreadsheet_example` assume a valid bearer token is set in the environment variable ``BEARER_TOKEN``. You can get a bearer token by using the ``/oauth2/token`` endpoint: .. code-block:: bash diff --git a/docs/quickstart.rst b/docs/quickstart.rst index b1a1b22..0f00182 100644 --- a/docs/quickstart.rst +++ b/docs/quickstart.rst @@ -5,13 +5,13 @@ After following the :doc:`installation` instructions and setting your bearer tok For this example, we'll search for a case and get its details. -To get detailed information on an individual case, we'll need the Lex Machina id for that case. One way to get the case's id is to search for it by case number. +To get detailed information on an individual case, we'll need the Lex Machina id for that case. One way to get the Lex Machina case id is to search for it by case number. In the first code block, you will see the following steps: #. We configure the client. This requires you first get a bearer token and set its value to the environment variable ``BEARER_TOKEN``. #. Using the configured client, we create an object with access to the Federal District Case endpoints. -#. We do a case number search. For this example, we search for the Samsung Electronics v. Sandisk Corporation, case number 9:02-cv-00058-JH. The case number search ignores judge initials at the end of a case number so they've been left out in the search example below. But they could be left in and the results would be the same. We further refine the search by using the optional court filter to limit our search to cases in the U.S. District Court for the Eastern District of Texas. +#. We do a case number search. For this example, we search for the Samsung Electronics v. Sandisk Corporation case with case number 9:02-cv-00058-JH. The case number search ignores judge initials at the end of a case number so they've been left out in the search example below. But they could be left in and the results would be the same. We further refine the search by using the optional court filter to limit our search to cases in the U.S. District Court for the Eastern District of Texas. .. code-block:: python @@ -56,8 +56,6 @@ We can see from the output above the Lex Machina id for the case is 88. We will .. code-block:: python - fed_dist_case_api_instance = lexmachina.FederalDistrictCasesApi(api_client) - apple_v_sandisk_case = fed_dist_case_api_instance.get_district_case(88) diff --git a/docs/spreadsheet_example.rst b/docs/spreadsheet_example.rst index b4cd173..6d4b182 100644 --- a/docs/spreadsheet_example.rst +++ b/docs/spreadsheet_example.rst @@ -5,7 +5,7 @@ Adding search results to a spreadsheet In the :doc:`quickstart`, we looked for an individual case. In this example, we'll look at a group of cases to see if we can glean any interesting information about the group of cases as a whole. -For this example, we'll look at Antitrust cases terminated in 2024, do some light analysis, and add the cases to a spreadsheet. A jupyter notebook with this code can be found in `the examples folder `_ +For this example, we'll look at Antitrust cases terminated in 2024, do some light analysis, and add the cases to a spreadsheet. A jupyter notebook with this code can be found in `the examples folder (NOTE: not yet there) `_ In the :doc:`quickstart`, you saw how we created an API client object which we then used to create an object with access to the Federal District case API endpoints: @@ -82,7 +82,7 @@ Armed with case ids, we can then get case data for each of those cases. While we print(f'{len(case_data)} out of {len(case_ids)} processed') -We can now do some analysis. First we'll check which judges saw the most of these cases. +We can now do some analysis. First we'll check which judges saw the most cases among Antitrust cases that terminated in 2024. We'll also get info on how long these cases lasted. .. code-block:: python @@ -98,7 +98,7 @@ We'll also get info on how long these cases lasted. ) -The above shows that 378 judges saw these 671 cases. +If we check the length of keys in ``cases_by_judge``, we'll see that 378 judges saw these 671 cases. Next, we'll get some timing info: @@ -111,16 +111,16 @@ Next, we'll get some timing info: all_durations += [c['duration'].days for c in case_group] -If we import the ``statistics`` library, we can check out the mean and median values: +If we import the ``statistics`` library, we can check out the mean and median timing values for all Antitrust cases terminated in 2024 (timing is in days): .. code-block:: python import statistics - round(statistics.mean(sorted_all_durations)) + round(statistics.mean(all_durations)) 1084 - statistics.median(sorted_all_durations) + statistics.median(all_durations) 451 @@ -140,7 +140,7 @@ First let's sort judges by case counts: ) -Now let's check duration stats for the top five judges: +And then check duration stats for the top five judges: .. code-block:: python @@ -183,7 +183,7 @@ Now let's check duration stats for the top five judges: Now lets add the cases to a spreadsheet. -For this example, since we focused on judges until now, for the spreadsheet let's focus on something different and say we are most interested in analyzing which law firms and the roles they represented. +For this example, since we focused on judges until now, for the spreadsheet let's focus on law firms and the roles of the parties they represented. First, lets create the rows. We'll first determine which columns we want and then add that info for each row. From f676265a7933cf5ec3610a64c6c15794ca331e28 Mon Sep 17 00:00:00 2001 From: Frances Wong Date: Thu, 8 May 2025 15:49:23 -0600 Subject: [PATCH 19/21] [#15] Update documentation link --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index d763cee..b959656 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,7 +37,7 @@ dependencies = [ ] [project.urls] -Documentation = "https://github.com/LexMachinaInc/python-lexmachina-sync-api-client" +Documentation = "https://lexmachinaincpython-lexmachina-sync-api-client.readthedocs.io/en/latest/" Issues = "https://github.com/LexMachinaInc/python-lexmachina-sync-api-client/issues" Source = "https://github.com/LexMachinaInc/python-lexmachina-sync-api-client" Changelog = "https://github.com/LexMachinaInc/python-lexmachina-sync-api-client/blob/main/CHANGELOG.md" From db94438094154077206fb61e1d02f17fc4b8e2d8 Mon Sep 17 00:00:00 2001 From: Frances Wong Date: Fri, 9 May 2025 12:30:50 -0600 Subject: [PATCH 20/21] [#15] Fix typos, specify steps are done in python shell --- docs/quickstart.rst | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/docs/quickstart.rst b/docs/quickstart.rst index 0f00182..9ff4900 100644 --- a/docs/quickstart.rst +++ b/docs/quickstart.rst @@ -13,6 +13,8 @@ In the first code block, you will see the following steps: #. Using the configured client, we create an object with access to the Federal District Case endpoints. #. We do a case number search. For this example, we search for the Samsung Electronics v. Sandisk Corporation case with case number 9:02-cv-00058-JH. The case number search ignores judge initials at the end of a case number so they've been left out in the search example below. But they could be left in and the results would be the same. We further refine the search by using the optional court filter to limit our search to cases in the U.S. District Court for the Eastern District of Texas. +Now to get started. In a jupyter notebook or ``ipython`` try out the following: + .. code-block:: python import lexmachina @@ -35,21 +37,23 @@ In the first code block, you will see the following steps: When we look at ``case_search_results``, this search conveniently returns just one result (if we had left out the court filter, it would have returned three results): .. code-block:: python - - [ - DistrictCaseNumberSearchResult( - total_count=1, - input_case_number='9:02-cv-00058', - input_court='txed', - matches=[ - DistrictCaseNumberReference( - url='https://api.lexmachina.com/district-cases/88', - district_case_id=88, - case_number='9:02-cv-00058', - court='U.S. District Court for the Eastern District of Texas', - title='Samsung Electronics v. Sandisk Corporation' - ) - ] + + case_search_results + [ + DistrictCaseNumberSearchResult( + total_count=1, + input_case_number='9:02-cv-00058', + input_court='txed', + matches=[ + DistrictCaseNumberReference( + url='https://api.lexmachina.com/district-cases/88', + district_case_id=88, + case_number='9:02-cv-00058', + court='U.S. District Court for the Eastern District of Texas', + title='Samsung Electronics v. Sandisk Corporation' + ) + ] + ] We can see from the output above the Lex Machina id for the case is 88. We will use that in the endpoint to get data on an individual case: From 8cdc176c916e29d902104189b32bb85523a696f4 Mon Sep 17 00:00:00 2001 From: Frances Wong Date: Fri, 9 May 2025 16:04:54 -0600 Subject: [PATCH 21/21] [#15] Tweaks to docs --- docs/installation.rst | 2 +- docs/quickstart.rst | 3 +- docs/spreadsheet_example.rst | 67 +++-- examples/spreadsheet_example.ipynb | 446 ++++++++++++----------------- 4 files changed, 239 insertions(+), 279 deletions(-) diff --git a/docs/installation.rst b/docs/installation.rst index 43447c6..b93d81d 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -8,7 +8,7 @@ Install using pip within a virtual environment: $ pip install lexmachina-client -The examples in the :doc:`quickstart` and :doc:`spreadsheet_example` assume a valid bearer token is set in the environment variable ``BEARER_TOKEN``. You can get a bearer token by using the ``/oauth2/token`` endpoint: +The examples in :doc:`quickstart` and :doc:`spreadsheet_example` assume a valid bearer token is set in the environment variable ``BEARER_TOKEN``. You can get a bearer token by using the ``/oauth2/token`` endpoint: .. code-block:: bash diff --git a/docs/quickstart.rst b/docs/quickstart.rst index 9ff4900..74cf7ef 100644 --- a/docs/quickstart.rst +++ b/docs/quickstart.rst @@ -9,7 +9,8 @@ To get detailed information on an individual case, we'll need the Lex Machina id In the first code block, you will see the following steps: -#. We configure the client. This requires you first get a bearer token and set its value to the environment variable ``BEARER_TOKEN``. +#. We configure the client. In this example we set the value of the bearer token in an environment variable (mostly to prevent contributors to this documentation from accidentally exposing credentials), but you could also feed the value directly to ``access_token``. + #. Using the configured client, we create an object with access to the Federal District Case endpoints. #. We do a case number search. For this example, we search for the Samsung Electronics v. Sandisk Corporation case with case number 9:02-cv-00058-JH. The case number search ignores judge initials at the end of a case number so they've been left out in the search example below. But they could be left in and the results would be the same. We further refine the search by using the optional court filter to limit our search to cases in the U.S. District Court for the Eastern District of Texas. diff --git a/docs/spreadsheet_example.rst b/docs/spreadsheet_example.rst index 6d4b182..dbeceb8 100644 --- a/docs/spreadsheet_example.rst +++ b/docs/spreadsheet_example.rst @@ -1,12 +1,10 @@ -Adding search results to a spreadsheet -====================================== +Adding Case Data to a Spreadsheet +================================= -In the :doc:`quickstart`, we looked for an individual case. In this example, we'll look at a group of cases to see if we can glean any interesting information about the group of cases as a whole. - - -For this example, we'll look at Antitrust cases terminated in 2024, do some light analysis, and add the cases to a spreadsheet. A jupyter notebook with this code can be found in `the examples folder (NOTE: not yet there) `_ +In this example, we will look at Antitrust cases that terminated in 2024, do some light timing analysis, and add data on those cases to a spreadsheet. +We'll be using `openpyxl `_ to add data to an Excel file. You can install this package with the command ``pip install openpyxl``. In the :doc:`quickstart`, you saw how we created an API client object which we then used to create an object with access to the Federal District case API endpoints: @@ -65,7 +63,7 @@ Below, we iterate over pages and save the resulting case ids to a list. While th query['page'] = current_page + 1 else: - print(f'Antitrust cases terminated in 2024 has length {len(case_ids)}') + print(f'Number of Antitrust cases terminated in 2024: {len(case_ids)}') done_paging=True @@ -78,7 +76,7 @@ Armed with case ids, we can then get case data for each of those cases. While we for case_id in case_ids: case_data.append(fed_dist_case_api_instance.get_district_case(case_id)) - if len(case_data) % 50 == 0: + if len(case_data) % 20 == 0: print(f'{len(case_data)} out of {len(case_ids)} processed') @@ -98,10 +96,24 @@ We'll also get info on how long these cases lasted. ) -If we check the length of keys in ``cases_by_judge``, we'll see that 378 judges saw these 671 cases. +If we check the length of keys in ``cases_by_judge``, we'll see that 378 judges saw these 671 cases. We'll also check one of the judges to see how the data is represented. We see timing is represented in days. -Next, we'll get some timing info: +.. code-block:: python + len(cases_by_judge) + 378 + + list(cases_by_judge)[:5] + [('Mitchell S. Goldberg', 3193), + ('Edmond E-Min Chang', 3342), + ('Miriam Goldman Cedarbaum', 406), + ('Lorna Gail Schofield', 3451), + ('Joel A. Pisano', 2851)] + + cases_by_judge[('Lorna Gail Schofield', 3451)] + [{'case_id': 2000009555, 'duration': datetime.timedelta(days=4981)}] + +Next, we'll get some timing info: .. code-block:: python @@ -139,7 +151,6 @@ First let's sort judges by case counts: case_count_by_judges, key=lambda x: x[-1], reverse=True ) - And then check duration stats for the top five judges: .. code-block:: python @@ -185,7 +196,35 @@ Now lets add the cases to a spreadsheet. For this example, since we focused on judges until now, for the spreadsheet let's focus on law firms and the roles of the parties they represented. -First, lets create the rows. We'll first determine which columns we want and then add that info for each row. + +First we'll check the structure of the law firm and party data provided: + + +.. code-block:: python + + case_data[0].law_firms[:3] + [LawFirm(name='Kessler Topaz Meltzer & Check', law_firm_id=27, client_party_ids=[257121, 52552843, 231694, 37904, 23356662, 20047290, 24917852, 37648157]), + LawFirm(name='Hagens Berman Sobol Shapiro', law_firm_id=30, client_party_ids=[231694]), + LawFirm(name='Berger Montague', law_firm_id=51, client_party_ids=[231694])] + + case_data[0].parties[:3] + [Party(name='Pennsylvania Employees Benefit Trust Fund', party_id=37904, role='Plaintiff'), + Party(name='Cephalon, Inc.', party_id=20036179, role='Defendant'), + Party(name='Vista Health Plan, Inc.', party_id=20047290, role='Plaintiff')] + + +To translate party ids provided in law firm information to party names, we will create a dictionary mapping party ids to party names: + +.. code-block:: python + + parties_by_id_by_case_id = {} + + for c in case_data: + parties_by_id_by_case_id[c.district_case_id] = {} + for p in c.parties: + parties_by_id_by_case_id[c.district_case_id][p.party_id] = p + +Now we are ready to create our spreadsheet rows! We'll first determine which columns we want and then add that info for each row. .. code-block:: python @@ -222,7 +261,6 @@ First, lets create the rows. We'll first determine which columns we want and the Now we'll spot check a few of them, including the header to make sure we added it. - .. code-block:: python len(rows) @@ -271,9 +309,6 @@ Now we'll spot check a few of them, including the header to make sure we added i Now let's add these rows to a spreadsheet. -For this example we'll be using `openpyxl `_, which you can install using ``pip install openpyxl``. - - .. code-block:: python from openpyxl import Workbook diff --git a/examples/spreadsheet_example.ipynb b/examples/spreadsheet_example.ipynb index a08e671..92323c1 100644 --- a/examples/spreadsheet_example.ipynb +++ b/examples/spreadsheet_example.ipynb @@ -2,17 +2,17 @@ "cells": [ { "cell_type": "code", - "execution_count": 78, + "execution_count": 1, "id": "ae993c1c-a333-4de3-b2a9-842b31f3a386", "metadata": {}, "outputs": [], "source": [ "import os\n", - "import lexmachina\n", "from collections import defaultdict\n", - "# pip install openpyxl\n", - "import openpyxl\n", - "import statistics" + "import statistics\n", + "\n", + "import lexmachina\n", + "import openpyxl # pip install openpyxl" ] }, { @@ -28,6 +28,26 @@ ")" ] }, + { + "cell_type": "code", + "execution_count": 3, + "id": "097d4903-2407-4115-8773-d4c06e7878a1", + "metadata": {}, + "outputs": [], + "source": [ + "api_client = lexmachina.ApiClient(configuration)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "11d376bb-e4eb-4c18-8b30-2b2798fca3dc", + "metadata": {}, + "outputs": [], + "source": [ + "fed_dist_case_api_instance = lexmachina.FederalDistrictCasesApi(api_client)" + ] + }, { "cell_type": "code", "execution_count": 6, @@ -69,57 +89,88 @@ "current_page=5\n", "current_page=6\n", "current_page=7\n", - "Antitrust cases terminated in 2024 has length 671\n", - "50 out of 671 processed\n", + "Number of Antitrust cases terminated in 2024: 671\n" + ] + } + ], + "source": [ + "case_ids = []\n", + "done_paging = False\n", + "\n", + "while not done_paging:\n", + " query_response = fed_dist_case_api_instance.query_district_cases(query)\n", + "\n", + " if query_response.cases:\n", + " current_page = query['page']\n", + " print(f'{current_page=}')\n", + " result_case_ids = [caseref.district_case_id for caseref in query_response.cases]\n", + " case_ids += result_case_ids\n", + " query['page'] = current_page + 1\n", + "\n", + " else:\n", + " print(f'Number of Antitrust cases terminated in 2024: {len(case_ids)}')\n", + " done_paging=True" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "ebb09c14-4741-4008-9535-452b5ff989b4", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Now getting indvidual case info\n", + "20 out of 671 processed\n", + "40 out of 671 processed\n", + "60 out of 671 processed\n", + "80 out of 671 processed\n", "100 out of 671 processed\n", - "150 out of 671 processed\n", + "120 out of 671 processed\n", + "140 out of 671 processed\n", + "160 out of 671 processed\n", + "180 out of 671 processed\n", "200 out of 671 processed\n", - "250 out of 671 processed\n", + "220 out of 671 processed\n", + "240 out of 671 processed\n", + "260 out of 671 processed\n", + "280 out of 671 processed\n", "300 out of 671 processed\n", - "350 out of 671 processed\n", + "320 out of 671 processed\n", + "340 out of 671 processed\n", + "360 out of 671 processed\n", + "380 out of 671 processed\n", "400 out of 671 processed\n", - "450 out of 671 processed\n", + "420 out of 671 processed\n", + "440 out of 671 processed\n", + "460 out of 671 processed\n", + "480 out of 671 processed\n", "500 out of 671 processed\n", - "550 out of 671 processed\n", + "520 out of 671 processed\n", + "540 out of 671 processed\n", + "560 out of 671 processed\n", + "580 out of 671 processed\n", "600 out of 671 processed\n", - "650 out of 671 processed\n" + "620 out of 671 processed\n", + "640 out of 671 processed\n", + "660 out of 671 processed\n" ] } ], "source": [ - "with lexmachina.ApiClient(configuration) as api_client:\n", - " fed_dist_case_api_instance = lexmachina.FederalDistrictCasesApi(api_client)\n", - "\n", - " case_ids = []\n", - " done_paging = False\n", - "\n", - " while not done_paging:\n", - " query_response = fed_dist_case_api_instance.query_district_cases(query)\n", - " \n", - " if query_response.cases:\n", - " current_page = query['page']\n", - " print(f'{current_page=}')\n", - " result_case_ids = [caseref.district_case_id for caseref in query_response.cases]\n", - " case_ids += result_case_ids\n", - " query['page'] = current_page + 1\n", - " \n", - " else:\n", - " print(f'Antitrust cases terminated in 2024 has length {len(case_ids)}')\n", - " done_paging=True\n", + "case_data = []\n", "\n", - " case_data = []\n", - "\n", - " for case_id in case_ids:\n", - " case_data.append(fed_dist_case_api_instance.get_district_case(case_id))\n", - " if len(case_data) % 50 == 0:\n", - " print(f'{len(case_data)} out of {len(case_ids)} processed')\n", - "\n", - " " + "for case_id in case_ids:\n", + " case_data.append(fed_dist_case_api_instance.get_district_case(case_id))\n", + " if len(case_data) % 20 == 0:\n", + " print(f'{len(case_data)} out of {len(case_ids)} processed')" ] }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 9, "id": "b069531f-0c0b-440a-bf9a-18d5bd02bf01", "metadata": {}, "outputs": [ @@ -129,7 +180,7 @@ "[FederalJudge(name='Mitchell S. Goldberg', federal_judge_id=3193)]" ] }, - "execution_count": 10, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -140,7 +191,7 @@ }, { "cell_type": "code", - "execution_count": 62, + "execution_count": 10, "id": "88204f40-0c2e-4b59-b140-407b6b0bd85e", "metadata": {}, "outputs": [], @@ -150,7 +201,7 @@ }, { "cell_type": "code", - "execution_count": 63, + "execution_count": 11, "id": "92093b5a-acd0-41c7-8b3c-841d94d873f5", "metadata": {}, "outputs": [], @@ -163,7 +214,7 @@ }, { "cell_type": "code", - "execution_count": 64, + "execution_count": 12, "id": "6e7f1382-5988-4cb7-ae8b-058f9109fa7c", "metadata": {}, "outputs": [ @@ -173,7 +224,7 @@ "378" ] }, - "execution_count": 64, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -184,7 +235,7 @@ }, { "cell_type": "code", - "execution_count": 65, + "execution_count": 13, "id": "afdb2296-fe46-4f84-8635-449ff78c0a01", "metadata": {}, "outputs": [ @@ -198,7 +249,7 @@ " ('Joel A. Pisano', 2851)]" ] }, - "execution_count": 65, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -209,7 +260,7 @@ }, { "cell_type": "code", - "execution_count": 66, + "execution_count": 14, "id": "ba5310fb-db7d-40e2-b682-54f9d8fe8eaf", "metadata": {}, "outputs": [ @@ -219,7 +270,7 @@ "[{'case_id': 2000009555, 'duration': datetime.timedelta(days=4981)}]" ] }, - "execution_count": 66, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } @@ -230,280 +281,203 @@ }, { "cell_type": "code", - "execution_count": 67, - "id": "34c9b660-853d-4b26-92ff-b03849bfa8bd", + "execution_count": 15, + "id": "014909c0-f89c-45a8-b098-a797f4cfc51c", "metadata": {}, "outputs": [], "source": [ - "case_count_by_judges = [(judge_info, len(cases_by_judge[judge_info])) for judge_info in cases_by_judge]" + "all_durations = []" ] }, { "cell_type": "code", - "execution_count": 68, - "id": "b8c7b5f7-9798-4036-957b-22f40eed1d83", + "execution_count": 16, + "id": "7a21f080-4aba-445d-a25a-4dc09e483408", "metadata": {}, "outputs": [], "source": [ - "sorted_case_counts_by_judges = sorted(case_count_by_judges, key=lambda x: x[-1], reverse=True)" + "for case_group in cases_by_judge.values():\n", + " all_durations += [c['duration'].days for c in case_group]" ] }, { "cell_type": "code", - "execution_count": 69, - "id": "5a9b2351-d5a0-4cef-b744-7a986428d4e7", + "execution_count": 17, + "id": "53db69a3-8671-4982-8837-ffe726baa09e", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "[(('Edgardo Ramos', 3405), 37),\n", - " (('Waverly David Crenshaw Jr.', 3603), 34),\n", - " (('Sarah Elizabeth Pitlyk', 7465646), 30),\n", - " (('P. Kevin Castel', 3029), 23),\n", - " (('Sara Elizabeth Lioi', 3140), 23)]" + "[6752, 4015, 4011, 3980, 3952]" ] }, - "execution_count": 69, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "sorted_case_counts_by_judges[:5]" + "all_durations[:5]" ] }, { "cell_type": "code", - "execution_count": 70, - "id": "514d178c-3110-4564-a006-d6a6dbb4ebc7", + "execution_count": 18, + "id": "9ac3916f-77b4-47f1-925c-db595de7b037", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "[(('Steven Douglas Merryday', 1627), 1),\n", - " (('Mary Stenson Scriven', 3189), 1),\n", - " (('John George Koeltl', 1305), 1),\n", - " (('John Charles Hinderaker', 8938396), 1),\n", - " (('Yvonne Gonzalez Rogers', 3404), 1)]" + "1084" ] }, - "execution_count": 70, + "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "sorted_case_counts_by_judges[-5:]" + "round(statistics.mean(all_durations))" ] }, { "cell_type": "code", - "execution_count": 71, - "id": "b5b75e1a-b339-4255-a0e0-cd299b8eb944", + "execution_count": 19, + "id": "585fe050-7a73-47f3-bf89-b7986ad9f460", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "[[{'case_id': 97091, 'duration': datetime.timedelta(days=6752)},\n", - " {'case_id': 2000045171, 'duration': datetime.timedelta(days=4015)},\n", - " {'case_id': 2000045229, 'duration': datetime.timedelta(days=4011)},\n", - " {'case_id': 2000046633, 'duration': datetime.timedelta(days=3980)},\n", - " {'case_id': 2000047864, 'duration': datetime.timedelta(days=3952)},\n", - " {'case_id': 2000049181, 'duration': datetime.timedelta(days=3918)},\n", - " {'case_id': 2000049340, 'duration': datetime.timedelta(days=3912)},\n", - " {'case_id': 2000049341, 'duration': datetime.timedelta(days=3912)},\n", - " {'case_id': 2000049655, 'duration': datetime.timedelta(days=3905)},\n", - " {'case_id': 2000049658, 'duration': datetime.timedelta(days=3905)},\n", - " {'case_id': 2000049746, 'duration': datetime.timedelta(days=3903)},\n", - " {'case_id': 2000051026, 'duration': datetime.timedelta(days=3863)}],\n", - " [{'case_id': 48907, 'duration': datetime.timedelta(days=6117)},\n", - " {'case_id': 2007889413, 'duration': datetime.timedelta(days=935)},\n", - " {'case_id': 2034461979, 'duration': datetime.timedelta(days=3)}],\n", - " [{'case_id': 2000009555, 'duration': datetime.timedelta(days=4981)}],\n", - " [{'case_id': 2000009555, 'duration': datetime.timedelta(days=4981)}],\n", - " [{'case_id': 2000026715, 'duration': datetime.timedelta(days=4739)},\n", - " {'case_id': 2000028620, 'duration': datetime.timedelta(days=4676)},\n", - " {'case_id': 2000029363, 'duration': datetime.timedelta(days=4653)},\n", - " {'case_id': 2000034588, 'duration': datetime.timedelta(days=4500)},\n", - " {'case_id': 2000035078, 'duration': datetime.timedelta(days=4481)}]]" + "451" ] }, - "execution_count": 71, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "list( cases_by_judge.values())[:5]" + "statistics.median(all_durations)" ] }, { "cell_type": "code", - "execution_count": 84, - "id": "014909c0-f89c-45a8-b098-a797f4cfc51c", - "metadata": {}, - "outputs": [], - "source": [ - "all_durations = []" - ] - }, - { - "cell_type": "code", - "execution_count": 85, - "id": "7a21f080-4aba-445d-a25a-4dc09e483408", + "execution_count": 20, + "id": "34c9b660-853d-4b26-92ff-b03849bfa8bd", "metadata": {}, "outputs": [], "source": [ - "for case_group in cases_by_judge.values():\n", - " all_durations += [c['duration'].days for c in case_group]" - ] - }, - { - "cell_type": "code", - "execution_count": 86, - "id": "53db69a3-8671-4982-8837-ffe726baa09e", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[6752, 4015, 4011, 3980, 3952]" - ] - }, - "execution_count": 86, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "all_durations[:5]" + "case_count_by_judges = [(judge_info, len(cases_by_judge[judge_info])) for judge_info in cases_by_judge]" ] }, { "cell_type": "code", - "execution_count": 87, - "id": "a54932ca-033e-4686-91fe-bcddd0e37582", + "execution_count": 21, + "id": "b8c7b5f7-9798-4036-957b-22f40eed1d83", "metadata": {}, "outputs": [], "source": [ - "sorted_all_durations = sorted(all_durations)" - ] - }, - { - "cell_type": "code", - "execution_count": 88, - "id": "3476b298-0fe6-4679-ba74-72a6e84c2a4c", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[0, 1, 1]" - ] - }, - "execution_count": 88, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "sorted_all_durations[:3]" - ] - }, - { - "cell_type": "code", - "execution_count": 89, - "id": "f1d85467-43de-4c1e-9ede-81f4ab8fbea5", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[4981, 6117, 6752]" - ] - }, - "execution_count": 89, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "sorted_all_durations[-3:]" + "sorted_case_counts_by_judges = sorted(case_count_by_judges, key=lambda x: x[-1], reverse=True)" ] }, { "cell_type": "code", - "execution_count": 97, - "id": "9ac3916f-77b4-47f1-925c-db595de7b037", + "execution_count": 22, + "id": "5a9b2351-d5a0-4cef-b744-7a986428d4e7", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "1084" + "[(('Edgardo Ramos', 3405), 37),\n", + " (('Waverly David Crenshaw Jr.', 3603), 34),\n", + " (('Sarah Elizabeth Pitlyk', 7465646), 30),\n", + " (('P. Kevin Castel', 3029), 23),\n", + " (('Sara Elizabeth Lioi', 3140), 23)]" ] }, - "execution_count": 97, + "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "round(statistics.mean(sorted_all_durations))" + "sorted_case_counts_by_judges[:5]" ] }, { "cell_type": "code", - "execution_count": 105, - "id": "585fe050-7a73-47f3-bf89-b7986ad9f460", + "execution_count": 23, + "id": "514d178c-3110-4564-a006-d6a6dbb4ebc7", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "451" + "[(('Steven Douglas Merryday', 1627), 1),\n", + " (('Mary Stenson Scriven', 3189), 1),\n", + " (('John George Koeltl', 1305), 1),\n", + " (('John Charles Hinderaker', 8938396), 1),\n", + " (('Yvonne Gonzalez Rogers', 3404), 1)]" ] }, - "execution_count": 105, + "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "statistics.median(sorted_all_durations)" + "sorted_case_counts_by_judges[-5:]" ] }, { "cell_type": "code", - "execution_count": 99, - "id": "655a73b1-ee3c-481a-9757-c29a66774d09", + "execution_count": 24, + "id": "b5b75e1a-b339-4255-a0e0-cd299b8eb944", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "[(('Edgardo Ramos', 3405), 37),\n", - " (('Waverly David Crenshaw Jr.', 3603), 34),\n", - " (('Sarah Elizabeth Pitlyk', 7465646), 30),\n", - " (('P. Kevin Castel', 3029), 23),\n", - " (('Sara Elizabeth Lioi', 3140), 23)]" + "[[{'case_id': 97091, 'duration': datetime.timedelta(days=6752)},\n", + " {'case_id': 2000045171, 'duration': datetime.timedelta(days=4015)},\n", + " {'case_id': 2000045229, 'duration': datetime.timedelta(days=4011)},\n", + " {'case_id': 2000046633, 'duration': datetime.timedelta(days=3980)},\n", + " {'case_id': 2000047864, 'duration': datetime.timedelta(days=3952)},\n", + " {'case_id': 2000049181, 'duration': datetime.timedelta(days=3918)},\n", + " {'case_id': 2000049340, 'duration': datetime.timedelta(days=3912)},\n", + " {'case_id': 2000049341, 'duration': datetime.timedelta(days=3912)},\n", + " {'case_id': 2000049655, 'duration': datetime.timedelta(days=3905)},\n", + " {'case_id': 2000049658, 'duration': datetime.timedelta(days=3905)},\n", + " {'case_id': 2000049746, 'duration': datetime.timedelta(days=3903)},\n", + " {'case_id': 2000051026, 'duration': datetime.timedelta(days=3863)}],\n", + " [{'case_id': 48907, 'duration': datetime.timedelta(days=6117)},\n", + " {'case_id': 2007889413, 'duration': datetime.timedelta(days=935)},\n", + " {'case_id': 2034461979, 'duration': datetime.timedelta(days=3)}],\n", + " [{'case_id': 2000009555, 'duration': datetime.timedelta(days=4981)}],\n", + " [{'case_id': 2000009555, 'duration': datetime.timedelta(days=4981)}],\n", + " [{'case_id': 2000026715, 'duration': datetime.timedelta(days=4739)},\n", + " {'case_id': 2000028620, 'duration': datetime.timedelta(days=4676)},\n", + " {'case_id': 2000029363, 'duration': datetime.timedelta(days=4653)},\n", + " {'case_id': 2000034588, 'duration': datetime.timedelta(days=4500)},\n", + " {'case_id': 2000035078, 'duration': datetime.timedelta(days=4481)}]]" ] }, - "execution_count": 99, + "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "sorted_case_counts_by_judges[:5]" + "list(cases_by_judge.values())[:5]" ] }, { "cell_type": "code", - "execution_count": 104, + "execution_count": 26, "id": "f6073a89-cc82-4400-bd2d-bde0894e5f60", "metadata": {}, "outputs": [ @@ -552,7 +526,7 @@ }, { "cell_type": "code", - "execution_count": 108, + "execution_count": 28, "id": "c9de2e16-b774-41d1-91e0-270b592c385f", "metadata": {}, "outputs": [ @@ -561,94 +535,44 @@ "text/plain": [ "[LawFirm(name='Kessler Topaz Meltzer & Check', law_firm_id=27, client_party_ids=[257121, 52552843, 231694, 37904, 23356662, 20047290, 24917852, 37648157]),\n", " LawFirm(name='Hagens Berman Sobol Shapiro', law_firm_id=30, client_party_ids=[231694]),\n", - " LawFirm(name='Berger Montague', law_firm_id=51, client_party_ids=[231694]),\n", - " LawFirm(name='Faruqi & Faruqi', law_firm_id=54, client_party_ids=[231694]),\n", - " LawFirm(name='Reed Smith', law_firm_id=433, client_party_ids=[27, 20020989]),\n", - " LawFirm(name='Department of Justice', law_firm_id=604, client_party_ids=[20020989]),\n", - " LawFirm(name='WilmerHale', law_firm_id=853, client_party_ids=[20036179]),\n", - " LawFirm(name='Stevens & Lee', law_firm_id=1280, client_party_ids=[2993, 1087]),\n", - " LawFirm(name='Venable', law_firm_id=1909, client_party_ids=[20030256, 2993, 20036179, 20027124, 1855, 27, 20020989, 1087]),\n", - " LawFirm(name='State of Pennsylvania', law_firm_id=2338, client_party_ids=[27, 20020989]),\n", - " LawFirm(name='Rawlings & Associates', law_firm_id=3008, client_party_ids=[231694]),\n", - " LawFirm(name='Mintz, Levin, Cohn, Ferris, Glovsky and Popeo', law_firm_id=4142, client_party_ids=[2993, 1087]),\n", - " LawFirm(name='Cravath, Swaine & Moore', law_firm_id=8914, client_party_ids=[27, 20020989, 1855]),\n", - " LawFirm(name='Volpe & Koenig', law_firm_id=18520, client_party_ids=[20030256, 20027124]),\n", - " LawFirm(name='Kirkland & Ellis', law_firm_id=20983, client_party_ids=[20030256, 2993, 20036179, 20027124, 1855, 27, 20020989, 1087]),\n", - " LawFirm(name='Eckert Seamans Cherin & Mellott', law_firm_id=27984, client_party_ids=[20036179]),\n", - " LawFirm(name='Willkie Farr & Gallagher', law_firm_id=31981, client_party_ids=[20030256, 2993, 1087, 20027124, 20020989, 1855]),\n", - " LawFirm(name='Robinson & Cole', law_firm_id=33881, client_party_ids=[27, 20020989]),\n", - " LawFirm(name='Finkelstein Thompson', law_firm_id=35890, client_party_ids=[37904, 23356662, 20047290, 24917852, 37648157]),\n", - " LawFirm(name='Lewis Brisbois Bisgaard & Smith', law_firm_id=54662, client_party_ids=[27, 20020989]),\n", - " LawFirm(name=\"Conrad, O'Brien, Gellman & Rohn\", law_firm_id=99445, client_party_ids=[20036179]),\n", - " LawFirm(name='Klafter Olsen & Lesser', law_firm_id=230148, client_party_ids=[20047290]),\n", - " LawFirm(name='Criden & Love', law_firm_id=303343, client_party_ids=[52552843]),\n", - " LawFirm(name='Hangley Aronchick Segal Pudlin & Schiller', law_firm_id=449598, client_party_ids=[9750]),\n", - " LawFirm(name='Armstrong Teasdale', law_firm_id=479700, client_party_ids=[1855]),\n", - " LawFirm(name='City of Philadelphia, Pennsylvania', law_firm_id=797446, client_party_ids=[27, 20020989]),\n", - " LawFirm(name='Hanzman Criden Love', law_firm_id=1241171, client_party_ids=[37904, 23356662, 20047290, 24917852, 37648157]),\n", - " LawFirm(name='Finkelstein & Krinsk', law_firm_id=1356290, client_party_ids=[34912145]),\n", - " LawFirm(name='Akin Gump Strauss Hauer & Feld', law_firm_id=1948224, client_party_ids=[27, 20020989, 1855]),\n", - " LawFirm(name='Hilliard & Shadowen', law_firm_id=2605659, client_party_ids=[9750]),\n", - " LawFirm(name='Roddy Klein & Ryan', law_firm_id=12953381, client_party_ids=[20036179]),\n", - " LawFirm(name='Kirschner & Gartrell', law_firm_id=14093126, client_party_ids=[20047290]),\n", - " LawFirm(name='Spector Roseman & Kodroff', law_firm_id=42582324, client_party_ids=[257121, 52552843, 37904, 23356662, 20047290, 24917852, 37648157]),\n", - " LawFirm(name='MONTGOMERY McCRACKEN WALKER & RHOADS', law_firm_id=45200100, client_party_ids=[1855]),\n", - " LawFirm(name='Harkins Cunningham', law_firm_id=62651519, client_party_ids=[20030256, 20027124]),\n", - " LawFirm(name='White and Williams', law_firm_id=76758445, client_party_ids=[20030256, 2993, 20036179, 20027124, 1855, 20020989, 1087]),\n", - " LawFirm(name='Law Offices of Robert W. Sink', law_firm_id=93515719, client_party_ids=[37904, 23356662, 20047290, 24917852, 37648157]),\n", - " LawFirm(name='Fox Rothschild', law_firm_id=226013496, client_party_ids=[20036179])]" + " LawFirm(name='Berger Montague', law_firm_id=51, client_party_ids=[231694])]" ] }, - "execution_count": 108, + "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "case_data[0].law_firms" + "case_data[0].law_firms[:3]" ] }, { "cell_type": "code", - "execution_count": 109, + "execution_count": 29, "id": "825628d6-1cdf-4ff4-b86b-f73bbd41b5a6", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "[Party(name='Debra Langan', party_id=24917852, role='Plaintiff'),\n", - " Party(name='Pennsylvania Turnpike Commission', party_id=23356662, role='Plaintiff'),\n", - " Party(name='SHIRLEY PANEBIANO', party_id=257121, role='Plaintiff'),\n", - " Party(name='AvMed, Inc.', party_id=231694, role='Plaintiff'),\n", - " Party(name='Jeffrey R. Krinsk', party_id=34912145, role='Plaintiff'),\n", - " Party(name='District Council 37 Health and Security Plan', party_id=37648157, role='Plaintiff'),\n", - " Party(name='Eckerd Corporation', party_id=9750, role='Defendant'),\n", - " Party(name='Teva Pharmaceutical Industries Ltd.', party_id=2993, role='Defendant'),\n", - " Party(name='Teva Pharmaceuticals USA, Inc.', party_id=1087, role='Defendant'),\n", - " Party(name='Mylan Pharmaceuticals, Inc.', party_id=27, role='Defendant'),\n", - " Party(name='Barr Laboratories, Inc.', party_id=1855, role='Defendant'),\n", - " Party(name='End Payor Class Plaintiffs', party_id=52552843, role='Plaintiff'),\n", - " Party(name='Ranbaxy Laboratories Limited', party_id=20027124, role='Defendant'),\n", - " Party(name='Vista Health Plan, Inc.', party_id=20047290, role='Plaintiff'),\n", - " Party(name='Ranbaxy Pharmaceuticals Inc.', party_id=20030256, role='Defendant'),\n", + "[Party(name='Pennsylvania Employees Benefit Trust Fund', party_id=37904, role='Plaintiff'),\n", " Party(name='Cephalon, Inc.', party_id=20036179, role='Defendant'),\n", - " Party(name='Mylan Laboratories, Inc.', party_id=20020989, role='Defendant'),\n", - " Party(name='Pennsylvania Employees Benefit Trust Fund', party_id=37904, role='Plaintiff')]" + " Party(name='Vista Health Plan, Inc.', party_id=20047290, role='Plaintiff')]" ] }, - "execution_count": 109, + "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "case_data[0].parties" + "case_data[0].parties[:3]" ] }, { "cell_type": "code", - "execution_count": 110, + "execution_count": 30, "id": "8ac40da8-cd31-49a3-bdf0-16cbf08a77bb", "metadata": {}, "outputs": [], @@ -658,7 +582,7 @@ }, { "cell_type": "code", - "execution_count": 113, + "execution_count": 31, "id": "c4a47c85-6044-45c8-9e79-e3d21f62402d", "metadata": {}, "outputs": [],