Skip Navigation
bash

bash

  • Bash script to download and search youtube subtitles and output clickable timestamped urls

    Here is the script. ```

    #!/usr/bin/env bash

    Download and search youtube subs

    deps yt-dlp ,awk, perl, any one or more of either ugrep, ripgrep, grep

    usage "script youtube_url"

    main() { url="$@" check_if_url get_video_id search_for_downloaded_matching_files set_download_boolean_flag download_subs read_and_format_transcript_file echo_description_file user_search }

    Iterate over the array and add items to the new array if they match the regex

    check_if_url() { local regex='https://[[:space:]]+$' if ! [[ $url =~ $regex ]]; then echo "Invalid input. Valid input is a url matching regex ${regex}" exit 1 fi }

    get_video_id() { video_id=$(echo "$url" | sed -n 's/.v=\([^&]\).*/\1/p') }

    search_for_downloaded_matching_files() { # Find newest created files matching the video_id transcript_file="$( /usr/bin/ls -t --time=creation "$PWD"/${video_id}\.vtt 2>/dev/null | head -n 1 )" description_file="$( /usr/bin/ls -t --time=creation "$PWD"/${video_id}\.description 2>/dev/null | head -n 1 )" }

    set_download_boolean_flag() { if [ -n "$transcript_file" ] && [ -n "$description_file" ]; then download=0 # FALSE else download=1 # TRUE fi }

    download_subs() { if [ "$download" -eq 1 ]; then yt-dlp --restrict-filenames --write-auto-sub --skip-download "${url}" yt-dlp --restrict-filenames --sub-langs=eng --write-subs --skip-download "${url}" yt-dlp --restrict-filenames --write-description --skip-download "${url}" # Search files again since they were just downloaded search_for_downloaded_matching_files fi }

    read_and_format_transcript_file() { perl_removed_dupes="$(perl -0777 -pe 's/^\d\d.\n.\n.*<\/c>//gm' <"${transcript_file}")" local prefix="https://www.youtube.com/watch?v=${video_id}&t=" local suffix="s" formated_transcript_file="$(awk -v pre="$prefix" -v suf="$suffix" ' /^([0-9]{2}:){2}[0-9]{2}\.[0-9]{3}/ { split($1, a, /[:.]/); $1 = pre (int(a[1]*3600 + a[2]*60 + a[3]) - 3) suf; sub(/ --> [0-9]{2}:[0-9]{2}:[0-9]{2}\.[0-9]{3}/, ""); sub(/ align:start position:0%$/, ""); print; next; } { sub(/ align:start position:0%$/, ""); print; } ' <<<"${perl_removed_dupes}")" #CRLF for ugrep to avoid ?bug? where before lines are not all outputted formated_transcript_file_CRLF=$(printf '%b' "$formated_transcript_file" | sed 's/$/\r/') }

    echo_description_file() { cat "${description_file}" }

    user_search() { echo -e "\n\n" read -rp "Enter regex (read as raw input): " search_term

    : ${app_count:=0}

    if command -v ug >/dev/null 2>&1; then echo -e "\n\n\n\n" echo "Ugrep output" ug --pretty=never -B2 -A1 -i -Z+-~1 -e "${search_term}" --andnot "^https?:\/\/" <<<"$formated_transcript_file_CRLF" ((app_count++)) fi

    if command -v rg >/dev/null 2>&1; then echo -e "\n\n\n\n" echo "Ripgrep output" rg -iP -B2 -A7 "^(?!https?:\/\/).*\K${search_term}" <<<"$formated_transcript_file" ((app_count++)) fi

    if [ "$app_count" -eq 0 ]; then echo -e "\n\n\n\n" echo "Grep output" grep -iP -B2 -A1 "${search_term}" <<<"$formated_transcript_file" echo -e "\n\n" echo "Consider installing ripgrep and ugrep for better search" ((app_count++)) fi }

    main "$@"

    ```

    0
1 Active user