1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55
| import re import requests import os
def find_url(path): '''Find old img url''' old_url = [] for root, _, files in os.walk(path): for file in files: if file == ".DS_Store": continue with open(os.path.join(root, file), "r") as f: content = f.read() res = re.findall("https:(.+?)jpg", content) if res: for each_img in res: old_url.append("https:" + each_img + "jpg") return old_url
def download_img(urls): '''Download img files from old img url''' for url in urls: print("Downloading: {}".format(url)) img = requests.get(url) with open("/Users/suixin/temp/imgs/" + url[-36:], "wb") as f: f.write(img.content)
def replace_urls(path, urls_old): '''Replace url by new url and write to new files''' urls_new = [] for each_url_old in urls_old: urls_new.append("https://raw.githubusercontent.com/ShaneTian/blogimages/master/images/" + each_url_old[-36:]) for root, _, files in os.walk(path): for file in files: if file == ".DS_Store": continue with open(os.path.join(root, file), "r") as f: content = f.read() for idx, each_url_old in enumerate(urls_old): res = re.search(pattern=each_url_old, string=content) if res: print("Replace {} to {}\n".format(each_url_old, "https://raw.githubusercontent.com/ShaneTian/blogimages/master/images/" + each_url_old[-36:])) content = content.replace(each_url_old, urls_new[idx]) with open("/Users/suixin/temp/posts_new/" + file, "w") as f_write: f_write.write(content)
if __name__ == '__main__': urls_old = find_url("/Users/suixin/blog/source/_posts") replace_urls("/Users/suixin/temp/_posts", urls_old)
|