comma_car_segments.py 2.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081
  1. import os
  2. import requests
  3. # Forks with additional car support can fork the commaCarSegments repo on huggingface or host the LFS files themselves
  4. COMMA_CAR_SEGMENTS_REPO = os.environ.get("COMMA_CAR_SEGMENTS_REPO", "https://huggingface.co/datasets/commaai/commaCarSegments")
  5. COMMA_CAR_SEGMENTS_BRANCH = os.environ.get("COMMA_CAR_SEGMENTS_BRANCH", "main")
  6. COMMA_CAR_SEGMENTS_LFS_INSTANCE = os.environ.get("COMMA_CAR_SEGMENTS_LFS_INSTANCE", COMMA_CAR_SEGMENTS_REPO)
  7. def get_comma_car_segments_database():
  8. return requests.get(get_repo_raw_url("database.json")).json()
  9. # Helpers related to interfacing with the commaCarSegments repository, which contains a collection of public segments for users to perform validation on.
  10. def parse_lfs_pointer(text):
  11. header, lfs_version = text.splitlines()[0].split(" ")
  12. assert header == "version"
  13. assert lfs_version == "https://git-lfs.github.com/spec/v1"
  14. header, oid_raw = text.splitlines()[1].split(" ")
  15. assert header == "oid"
  16. header, oid = oid_raw.split(":")
  17. assert header == "sha256"
  18. header, size = text.splitlines()[2].split(" ")
  19. assert header == "size"
  20. return oid, size
  21. def get_lfs_file_url(oid, size):
  22. data = {
  23. "operation": "download",
  24. "transfers": [ "basic" ],
  25. "objects": [
  26. {
  27. "oid": oid,
  28. "size": int(size)
  29. }
  30. ],
  31. "hash_algo": "sha256"
  32. }
  33. headers = {
  34. "Accept": "application/vnd.git-lfs+json",
  35. "Content-Type": "application/vnd.git-lfs+json"
  36. }
  37. response = requests.post(f"{COMMA_CAR_SEGMENTS_LFS_INSTANCE}.git/info/lfs/objects/batch", json=data, headers=headers)
  38. assert response.ok
  39. obj = response.json()["objects"][0]
  40. assert "error" not in obj, obj
  41. return obj["actions"]["download"]["href"]
  42. def get_repo_raw_url(path):
  43. if "huggingface" in COMMA_CAR_SEGMENTS_REPO:
  44. return f"{COMMA_CAR_SEGMENTS_REPO}/raw/{COMMA_CAR_SEGMENTS_BRANCH}/{path}"
  45. def get_repo_url(path):
  46. # Automatically switch to LFS if we are requesting a file that is stored in LFS
  47. response = requests.head(get_repo_raw_url(path))
  48. if "text/plain" in response.headers.get("content-type"):
  49. # This is an LFS pointer, so download the raw data from lfs
  50. response = requests.get(get_repo_raw_url(path))
  51. assert response.status_code == 200
  52. oid, size = parse_lfs_pointer(response.text)
  53. return get_lfs_file_url(oid, size)
  54. else:
  55. # File has not been uploaded to LFS yet
  56. # (either we are on a fork where the data hasn't been pushed to LFS yet, or the CI job to push hasn't finished)
  57. return get_repo_raw_url(path)
  58. def get_url(route, segment, file="rlog.bz2"):
  59. return get_repo_url(f"segments/{route.replace('|', '/')}/{segment}/{file}")