Updated: 05 mrt 2026
converts ppt to pdf
import os
import argparse
# Defaults
greater_than_size = 100 * 1024 * 1024 # 100 MB
root_dir = "./"
def find_large_files(root_dir, greater_than_size):
results = []
for dirpath, dirnames, filenames in os.walk(root_dir):
for filename in filenames:
full_path = os.path.join(dirpath, filename)
try:
size = os.path.getsize(full_path)
if size > greater_than_size:
relative_path = os.path.relpath(full_path, root_dir)
results.append((relative_path, size))
except OSError:
# Skip files that cannot be accessed
pass
return results
def main():
parser = argparse.ArgumentParser(description="Find files larger than a specified size")
parser.add_argument(
"--directory",
default=root_dir,
help="Root directory to scan (default: current directory)"
)
parser.add_argument(
"--greater_than_size",
type=int,
default=greater_than_size,
help="Minimum file size in bytes"
)
args = parser.parse_args()
files = find_large_files(args.directory, args.greater_than_size)
for path, size in files:
print(f"{path}\t{size / (1024 * 1024):.2f} MB")
print(f"\nFound {len(files)} files larger than {args.greater_than_size} bytes")
if __name__ == "__main__":
main()
import os
import subprocess
def convert_html_to_md(folder_path):
"""Convert all .htm files in a folder to markdown using Pandoc."""
for subdir, dirs, files in os.walk(folder_path):
for file in files:
if file.endswith(".htm"):
html_path = os.path.join(subdir, file)
md_path = os.path.join(subdir, f"{os.path.splitext(file)[0]}.md")
print(f"Converting: {html_path} to {md_path}")
# Execute Pandoc command
subprocess.run(["pandoc", "-f", "html", "-t", "markdown", "-o", md_path, html_path], check=True)
print(f"Converted: {html_path} to {md_path}")
# Example usage
folder_path = 'path_to_your_html_files' # Replace this with the path to your folder
convert_html_to_md(folder_path)