123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135 |
- # -*- coding: utf-8 -*-
- """
- @author:XuMing(xuming624@qq.com)
- @description:
- """
- def edit_distance_word(word, char_set):
- """
- all edits that are one edit away from 'word'
- :param word:
- :param char_set:
- :return:
- """
- splits = [(word[:i], word[i:]) for i in range(len(word) + 1)]
- transposes = [L + R[1] + R[0] + R[2:] for L, R in splits if len(R) > 1]
- replaces = [L + c + R[1:] for L, R in splits if R for c in char_set]
- return set(transposes + replaces)
- def get_sub_array(nums):
- """
- 取所有连续子串,
- [0, 1, 2, 5, 7, 8]
- => [[0, 3], 5, [7, 9]]
- :param nums: sorted(list)
- :return:
- """
- ret = []
- ii = 0
- for i, c in enumerate(nums):
- if i == 0:
- pass
- elif i <= ii:
- continue
- elif i == len(nums) - 1:
- ret.append([c])
- break
- ii = i
- cc = c
- # get continuity Substring
- while ii < len(nums) - 1 and nums[ii + 1] == cc + 1:
- ii = ii + 1
- cc = cc + 1
- if ii > i:
- ret.append([c, nums[ii] + 1])
- else:
- ret.append([c])
- return ret
- def find_all_idx2(lst, item):
- """
- 取列表中指定元素的所有下标
- :param lst: 列表或字符串
- :param item: 指定元素
- :return: 下标列表
- """
- ids = []
- for i in range(len(lst)):
- if item == lst[i]:
- ids.append(i)
- return ids
- def find_all_idx(lst, item):
- """
- 取列表中指定元素的所有下标
- :param lst: 列表或字符串
- :param item: 指定元素
- :return: 下标列表
- """
- ids = []
- pos = -1
- for i in range(lst.count(item)):
- pos = lst.index(item, pos + 1)
- if pos > -1:
- ids.append(pos)
- return ids
- def edit_distance_dp(str1: str, str2: str) -> int:
- """
- 计算两个字符串的编辑距离
- Args:
- str1:
- str2:
- Returns:
- int: 编辑距离
- """
- if not str1:
- return len(str2)
- if not str2:
- return len(str1)
- dp = [[0 for _ in range(len(str2) + 1)] for _ in range(len(str1) + 1)]
- for i in range(0, len(str1) + 1):
- dp[i][0] = i
- for j in range(0, len(str2) + 1):
- dp[0][j] = j
- for i in range(1, len(str1) + 1):
- for j in range(1, len(str2) + 1):
- if str1[i - 1] == str2[j - 1]:
- dp[i][j] = dp[i - 1][j - 1]
- else:
- dp[i][j] = min(dp[i - 1][j], dp[i][j - 1], dp[i - 1][j - 1]) + 1
- return dp[-1][-1]
- def edit_distance(str1, str2):
- try:
- # very fast
- # http://stackoverflow.com/questions/14260126/how-python-levenshtein-ratio-is-computed
- import Levenshtein
- d = Levenshtein.distance(str1, str2) / float(max(len(str1), len(str2)))
- except:
- # https://docs.python.org/2/library/difflib.html
- import difflib
- d = 1.0 - difflib.SequenceMatcher(lambda x: x == " ", str1, str2).ratio()
- return d
- if __name__ == "__main__":
- l = [1, 2, 3, 4, 2, 3, 4]
- item = 2
- print(find_all_idx(l, item))
- l = '我爱中国,我是中国人'
- item = '中国'
- print(find_all_idx(l, item))
|