file_helpers.py 2.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899
  1. import os
  2. import shutil
  3. import tempfile
  4. from atomicwrites import AtomicWriter
  5. def mkdirs_exists_ok(path):
  6. if path.startswith('http://') or path.startswith('https://'):
  7. raise ValueError('URL path')
  8. try:
  9. os.makedirs(path)
  10. except OSError:
  11. if not os.path.isdir(path):
  12. raise
  13. def rm_not_exists_ok(path):
  14. try:
  15. os.remove(path)
  16. except OSError:
  17. if os.path.exists(path):
  18. raise
  19. def rm_tree_or_link(path):
  20. if os.path.islink(path):
  21. os.unlink(path)
  22. elif os.path.isdir(path):
  23. shutil.rmtree(path)
  24. def get_tmpdir_on_same_filesystem(path):
  25. normpath = os.path.normpath(path)
  26. parts = normpath.split("/")
  27. if len(parts) > 1 and parts[1] == "scratch":
  28. return "/scratch/tmp"
  29. elif len(parts) > 2 and parts[2] == "runner":
  30. return f"/{parts[1]}/runner/tmp"
  31. return "/tmp"
  32. class NamedTemporaryDir():
  33. def __init__(self, temp_dir=None):
  34. self._path = tempfile.mkdtemp(dir=temp_dir)
  35. @property
  36. def name(self):
  37. return self._path
  38. def close(self):
  39. shutil.rmtree(self._path)
  40. def __enter__(self):
  41. return self
  42. def __exit__(self, exc_type, exc_value, traceback):
  43. self.close()
  44. class CallbackReader:
  45. """Wraps a file, but overrides the read method to also
  46. call a callback function with the number of bytes read so far."""
  47. def __init__(self, f, callback, *args):
  48. self.f = f
  49. self.callback = callback
  50. self.cb_args = args
  51. self.total_read = 0
  52. def __getattr__(self, attr):
  53. return getattr(self.f, attr)
  54. def read(self, *args, **kwargs):
  55. chunk = self.f.read(*args, **kwargs)
  56. self.total_read += len(chunk)
  57. self.callback(*self.cb_args, self.total_read)
  58. return chunk
  59. def _get_fileobject_func(writer, temp_dir):
  60. def _get_fileobject():
  61. return writer.get_fileobject(dir=temp_dir)
  62. return _get_fileobject
  63. def atomic_write_on_fs_tmp(path, **kwargs):
  64. """Creates an atomic writer using a temporary file in a temporary directory
  65. on the same filesystem as path.
  66. """
  67. # TODO(mgraczyk): This use of AtomicWriter relies on implementation details to set the temp
  68. # directory.
  69. writer = AtomicWriter(path, **kwargs)
  70. return writer._open(_get_fileobject_func(writer, get_tmpdir_on_same_filesystem(path)))
  71. def atomic_write_in_dir(path, **kwargs):
  72. """Creates an atomic writer using a temporary file in the same directory
  73. as the destination file.
  74. """
  75. writer = AtomicWriter(path, **kwargs)
  76. return writer._open(_get_fileobject_func(writer, os.path.dirname(path)))