log2json.sh 2.9 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677
  1. #!/usr/bin/env bash
  2. ############################################################################
  3. #
  4. # Licensed to the Apache Software Foundation (ASF) under one or more
  5. # contributor license agreements. See the NOTICE file distributed with
  6. # this work for additional information regarding copyright ownership. The
  7. # ASF licenses this file to you under the Apache License, Version 2.0 (the
  8. # "License"); you may not use this file except in compliance with the
  9. # License. You may obtain a copy of the License at
  10. #
  11. # http://www.apache.org/licenses/LICENSE-2.0
  12. #
  13. # Unless required by applicable law or agreed to in writing, software
  14. # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
  15. # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
  16. # License for the specific language governing permissions and limitations
  17. # under the License.
  18. #
  19. ############################################################################
  20. # We define a replacement for the quote character (")
  21. # since we cannot escape quote characters found inside
  22. # the commit message
  23. Q='^@^'
  24. function getlog
  25. {
  26. (echo -n '['
  27. git --no-pager log --follow --simplify-merges \
  28. --pretty=format:'{ '$Q'commit'$Q': '$Q'%H'$Q', '$Q'author'$Q': '$Q'%aN'$Q', '$Q'author-email'$Q': '$Q'%aE'$Q', '$Q'date'$Q': '$Q'%ad'$Q', '$Q'committer'$Q': '$Q'%cn'$Q', '$Q'committer-email'$Q': '$Q'%ce'$Q', '$Q'message'$Q': '$Q'%s'$Q', '$Q'body'$Q': '$Q'%b'$Q', '$Q'signed'$Q': '$Q'%G?'$Q', '$Q'signer'$Q': '$Q'%GS'$Q', '$Q'key'$Q': '$Q'%GK'$Q' },' -- "$1"
  29. echo -n ']') |
  30. sed -r 's|\\|\\\\|g' | # escape backquotes
  31. sed -r 's|"|\\"|g' | # replace quotes with escaped quotes
  32. tr '\r\n' ' ' | # replace newlines with spaces (otherwise strings) are broken
  33. sed -r 's|\}, \{|},\n{|g' | # add newlines between entries, for readability
  34. sed 's|\},\]|}]|g' | # remove dangling comma at the final array entry
  35. sed -r "s|\\^@\\^|\"|g" # replace $Q with "
  36. }
  37. function getblobs
  38. {
  39. echo -n '[';
  40. git --no-pager log --pretty='' --no-abbrev --raw --follow --simplify-merges -- "$1" |
  41. tr '\t' ' ' |
  42. cut -f 4,6 -d ' ' |
  43. sed -r 's|^(\S+) (\S+)$|{ "blob": "\1", "path": "\2" },|g' |
  44. sed -r '$s|,$||g' # remove dangling comma at the final array entry
  45. echo -n ']'
  46. }
  47. if [ "$1" = "" ]; then
  48. echo "usage: $(basename $0) <file>"
  49. exit 1
  50. fi
  51. out1=$(mktemp)
  52. out2=$(mktemp)
  53. getlog "$1" > $out1
  54. getblobs "$1" > $out2
  55. if [ "$(jq '. | length' $out1)" != "$(jq '. | length' $out2)" ]; then
  56. # TODO: handle this case, we get more than one blob when the file is moved
  57. # to/from submodule and it difficulties parsing. Also, the blob we get
  58. # for when they file is in the submodule is not usable since it is possibly
  59. # from the pointed repo.
  60. echo "Log and blob list differ in size, probably a file which lived in a submodule"
  61. echo "Log output is at: $out1"
  62. echo "Blob output is at: $out2"
  63. exit 1
  64. fi
  65. jq -s 'transpose | map(.[0] + .[1])' $out1 $out2
  66. rm -f $out1 $out2