Final_Assignment_Template

Build error

File size: 10,313 Bytes

45b200f

{
 "cells": [
  {
   "cell_type": "code",
   "id": "6369bdabdf59b658",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-06-10T20:28:20.833977Z",
     "start_time": "2025-06-10T20:28:14.932967Z"
    }
   },
   "source": [
    "from langchain.document_loaders import YoutubeLoader\n",
    "from langchain_yt_dlp.youtube_loader import YoutubeLoaderDL\n",
    "from globals import *\n",
    "import torch\n",
    "import torchaudio.transforms as T\n",
    "import pydub\n",
    "import numpy as np\n",
    "from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor"
   ],
   "outputs": [],
   "execution_count": 33
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-06-10T20:05:04.414620Z",
     "start_time": "2025-06-10T20:05:04.412354Z"
    }
   },
   "cell_type": "code",
   "source": "url = \"https://www.youtube.com/watch?v=1htKBjuUWec\"\n",
   "id": "666e521f8ecf3f47",
   "outputs": [],
   "execution_count": 14
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-06-10T20:26:57.491908Z",
     "start_time": "2025-06-10T20:26:57.489481Z"
    }
   },
   "cell_type": "code",
   "source": [
    "# Load transcript as LangChain Documents\n",
    "# loader = YoutubeLoader.from_youtube_url(url, add_video_info=False)\n",
    "# loader = YoutubeLoaderDL.from_youtube_url(\n",
    "#     url, add_video_info=True\n",
    "# )\n",
    "# docs = loader.load()\n",
    "#\n",
    "# # Print the transcript content\n",
    "# for doc in docs:\n",
    "#     print(doc.page_content)\n",
    "\n",
    "# Optionally, save to a file\n",
    "# with open(\"transcript.txt\", \"w\", encoding=\"utf-8\") as f:\n",
    "#     for doc in docs:\n",
    "#         f.write(doc.page_content)"
   ],
   "id": "initial_id",
   "outputs": [],
   "execution_count": 30
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-06-10T20:27:03.519450Z",
     "start_time": "2025-06-10T20:27:03.517474Z"
    }
   },
   "cell_type": "code",
   "source": [
    "# from pytube import YouTube\n",
    "#\n",
    "# yt = YouTube(url)\n",
    "#\n",
    "# # Download the audio stream (usually mp4)\n",
    "# stream = yt.streams.filter(only_audio=True).first()\n",
    "# stream.download(firstilename=f\"{yt.title}.mp3\")"
   ],
   "id": "ec4885c3a15d9a2b",
   "outputs": [],
   "execution_count": 31
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-06-10T20:04:18.601366Z",
     "start_time": "2025-06-10T20:04:18.597488Z"
    }
   },
   "cell_type": "code",
   "source": [
    "import ssl\n",
    "import certifi\n",
    "# Correct: assign a lambda (function) that returns a properly configured SSL context\n",
    "ssl._create_default_https_context = lambda: ssl.create_default_context(cafile=certifi.where())"
   ],
   "id": "167af702547c15e4",
   "outputs": [],
   "execution_count": 12
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-06-10T20:07:53.667018Z",
     "start_time": "2025-06-10T20:07:52.627871Z"
    }
   },
   "cell_type": "code",
   "source": [
    "\n",
    "from pytube import YouTube\n",
    "\n",
    "def download_video(url, output_path='.'):\n",
    "    try:\n",
    "        yt = YouTube(url)\n",
    "        print('here')\n",
    "        stream = yt.streams.get_highest_resolution()\n",
    "        print(f\"Downloading: {yt.title}\")\n",
    "        stream.download(output_path=output_path)\n",
    "        print(\"Download completed.\")\n",
    "    except Exception as e:\n",
    "        print(f\"Error: {e}\")\n",
    "\n",
    "# Example usage\n",
    "download_video(url)"
   ],
   "id": "289b9a4321ea487b",
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "here\n",
      "Error: HTTP Error 400: Bad Request\n"
     ]
    }
   ],
   "execution_count": 23
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-06-10T20:08:47.590897Z",
     "start_time": "2025-06-10T20:08:44.115350Z"
    }
   },
   "cell_type": "code",
   "source": [
    "import yt_dlp\n",
    "\n",
    "ydl_opts = {\n",
    "    'format': 'best',  # or 'bestvideo+bestaudio'\n",
    "    'outtmpl': '%(title)s.%(ext)s',  # save as video title\n",
    "}\n",
    "\n",
    "with yt_dlp.YoutubeDL(ydl_opts) as ydl:\n",
    "    ydl.download([url])"
   ],
   "id": "4eb045792318e67a",
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[youtube] Extracting URL: https://www.youtube.com/watch?v=1htKBjuUWec\n",
      "[youtube] 1htKBjuUWec: Downloading webpage\n",
      "[youtube] 1htKBjuUWec: Downloading tv client config\n",
      "[youtube] 1htKBjuUWec: Downloading tv player API JSON\n",
      "[youtube] 1htKBjuUWec: Downloading ios player API JSON\n",
      "[youtube] 1htKBjuUWec: Downloading m3u8 information\n",
      "[info] 1htKBjuUWec: Downloading 1 format(s): 18\n",
      "[download] Destination: Teal'c coffee first time.mp4\n",
      "[download] 100% of    1.19MiB in 00:00:01 at 1.09MiB/s   \n"
     ]
    }
   ],
   "execution_count": 24
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-06-10T20:22:32.706482Z",
     "start_time": "2025-06-10T20:22:29.843517Z"
    }
   },
   "cell_type": "code",
   "source": [
    "import yt_dlp\n",
    "\n",
    "file_name = 'my_audio_file'\n",
    "ydl_opts = {\n",
    "    'format': 'bestaudio/best',\n",
    "    'outtmpl': f'files/{file_name}.%(ext)s',  # <-- set your custom filename here\n",
    "    'postprocessors': [{\n",
    "        'key': 'FFmpegExtractAudio',\n",
    "        'preferredcodec': 'mp3',\n",
    "        'preferredquality': '192',\n",
    "    }],\n",
    "}\n",
    "\n",
    "with yt_dlp.YoutubeDL(ydl_opts) as ydl:\n",
    "    ydl.download([url])"
   ],
   "id": "68b51ca78254d8f",
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[youtube] Extracting URL: https://www.youtube.com/watch?v=1htKBjuUWec\n",
      "[youtube] 1htKBjuUWec: Downloading webpage\n",
      "[youtube] 1htKBjuUWec: Downloading tv client config\n",
      "[youtube] 1htKBjuUWec: Downloading tv player API JSON\n",
      "[youtube] 1htKBjuUWec: Downloading ios player API JSON\n",
      "[youtube] 1htKBjuUWec: Downloading m3u8 information\n",
      "[info] 1htKBjuUWec: Downloading 1 format(s): 251\n",
      "[download] Destination: files/my_audio_file.webm\n",
      "[download] 100% of  444.93KiB in 00:00:00 at 3.41MiB/s     \n",
      "[ExtractAudio] Destination: files/my_audio_file.mp3\n",
      "Deleting original file files/my_audio_file.webm (pass -k to keep)\n"
     ]
    }
   ],
   "execution_count": 26
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-06-10T20:35:23.558866Z",
     "start_time": "2025-06-10T20:35:23.515883Z"
    }
   },
   "cell_type": "code",
   "source": [
    "audio_model_dir = './models_for_proj/wav2vec2-base-960h'\n",
    "model = Wav2Vec2ForCTC.from_pretrained(audio_model_dir)\n",
    "processor = Wav2Vec2Processor.from_pretrained(audio_model_dir)\n",
    "\n",
    "def read_mp3(f, normalized=False):\n",
    "    \"\"\"Read MP3 file to numpy array.\"\"\"\n",
    "    a = pydub.AudioSegment.from_mp3(f)\n",
    "    y = np.array(a.get_array_of_samples())\n",
    "    if a.channels == 2:\n",
    "        y = y.reshape((-1, 2))\n",
    "        # y = y.mean(axis=1)\n",
    "        y = y[:,1]\n",
    "    if normalized:\n",
    "        return a.frame_rate, np.float32(y) / 2**15\n",
    "    else:\n",
    "        return a.frame_rate, y\n",
    "\n",
    "def describe_audio_tool(file_name: str) -> str:\n",
    "    \"\"\"\n",
    "    This tool receives a file name of an audio, uploads the audio and returns a detailed description of the audio.\n",
    "    Inputs: file_name as str\n",
    "    Outputs: audio detailed description as str\n",
    "    \"\"\"\n",
    "    # --------------------------------------------------------------------------- #\n",
    "    file_dir = f'files/{file_name}'\n",
    "    print(f\"{file_dir=}\")\n",
    "    audio_input_sr, audio_input_np = read_mp3(file_dir)\n",
    "    audio_input_t = torch.tensor(audio_input_np, dtype=torch.float32)\n",
    "    target_sr = 16000\n",
    "    resampler = T.Resample(audio_input_sr, target_sr, dtype=audio_input_t.dtype)\n",
    "    resampled_audio_input_t: torch.Tensor = resampler(audio_input_t)\n",
    "    resampled_audio_input_np = resampled_audio_input_t.numpy()\n",
    "    # --------------------------------------------------------------------------- #\n",
    "    inputs = processor(resampled_audio_input_np, sampling_rate=16000, return_tensors=\"pt\", padding=True)\n",
    "    # Inference\n",
    "    with torch.no_grad():\n",
    "        logits = model(**inputs).logits\n",
    "    # Decode\n",
    "    predicted_ids = torch.argmax(logits, dim=-1)\n",
    "    transcription = processor.decode(predicted_ids[0])\n",
    "    return transcription"
   ],
   "id": "64f438af2b38765f",
   "outputs": [],
   "execution_count": 43
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-06-10T20:35:27.235493Z",
     "start_time": "2025-06-10T20:35:26.202459Z"
    }
   },
   "cell_type": "code",
   "source": "describe_audio_tool(file_name=f'{file_name}.mp3')",
   "id": "b4a6ae10e1cbbcae",
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "file_dir='files/my_audio_file.mp3'\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "\"ALIS COFFEE'S GRAY WO IS JUST THINKING  YET HE TAT SOMESCHICKERY A CHIC TEK H IS NOT HOT EXTREMELY\""
      ]
     },
     "execution_count": 44,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 44
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": "",
   "id": "ce9aaf764346b7e4"
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}