Skip to content

Commit 520e088

Browse files
committed
Add main script
1 parent 2ad333a commit 520e088

File tree

1 file changed

+235
-0
lines changed

1 file changed

+235
-0
lines changed

stanford-dl.go

Lines changed: 235 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,235 @@
1+
package main
2+
3+
import (
4+
"flag"
5+
"fmt"
6+
"github.com/anaskhan96/soup"
7+
"github.com/gosuri/uiprogress"
8+
"io"
9+
"net/http"
10+
"os"
11+
"strconv"
12+
"strings"
13+
"sync"
14+
)
15+
16+
const helpString = `stanford-dl
17+
Author : https://github.com/coderick14
18+
19+
A dead simple script to download videos or pdfs from Stanford Engineering Everywhere.
20+
USAGE : stanford-dl -course COURSE_CODE [-type {video|pdf}] [-all] [-lec lectures] [--help]
21+
22+
--course Course name e.g. CS229, EE261
23+
--type Specify whether to download videos or pdfs. Defaults to PDF.
24+
--all Download for all lectures
25+
--lec Comma separated list of lectures e.g. 1,3,5,10
26+
--help Display this help message and quit
27+
28+
Found a bug? Feel free to raise an issue on https://github.com/coderick14/stanford-dl
29+
Contributions welcome :)`
30+
31+
// Wrapper over io.Reader to record progresses
32+
type passThrough struct {
33+
io.Reader
34+
index int
35+
curr int
36+
total int
37+
}
38+
39+
var bars = make([]*uiprogress.Bar, 0, 50)
40+
var factor int64
41+
42+
// Override Read method of io.Reader
43+
func (pt *passThrough) Read(p []byte) (int, error) {
44+
n, err := pt.Reader.Read(p)
45+
pt.curr += n
46+
47+
if err == nil || (err == io.EOF && n > 0) {
48+
bars[pt.index].Set(int((float64(pt.curr/int(factor)) / float64(pt.total)) * float64(pt.total)))
49+
}
50+
51+
return n, err
52+
}
53+
54+
// Goroutine to download a lecture
55+
func downloadLecture(index int, url string, fileName string, wg *sync.WaitGroup) {
56+
defer wg.Done()
57+
58+
// Send GET request to required url
59+
resp, err := http.Get(url)
60+
61+
if err != nil || resp.StatusCode != http.StatusOK {
62+
fmt.Println("Error while downloading", fileName)
63+
return
64+
}
65+
66+
defer resp.Body.Close()
67+
68+
// Open file for writing
69+
fh, err := os.Create(fileName)
70+
71+
if err != nil {
72+
fmt.Println("Error while creating file", fileName)
73+
return
74+
}
75+
76+
defer fh.Close()
77+
78+
// Initialize the progress bar for this lecture
79+
bars[index] = uiprogress.AddBar(int(resp.ContentLength / factor)).AppendCompleted()
80+
bars[index].PrependFunc(func(b *uiprogress.Bar) string {
81+
return "Downloading " + fileName
82+
})
83+
84+
// Create wrapper over io.Reader
85+
src := &passThrough{Reader: resp.Body, total: int(resp.ContentLength / factor), index: index}
86+
_, err = io.Copy(fh, src)
87+
88+
if err != nil {
89+
bars[index].AppendFunc(func(b *uiprogress.Bar) string {
90+
return "Failed"
91+
})
92+
return
93+
}
94+
95+
// Finished downloading
96+
bars[index].AppendFunc(func(b *uiprogress.Bar) string {
97+
return "Completed"
98+
})
99+
}
100+
101+
// Utility function to create a range of numbers
102+
func makeRange(n int) []int {
103+
var list = make([]int, n)
104+
105+
for i := 0; i < n; i++ {
106+
list[i] = i + 1
107+
}
108+
109+
return list
110+
}
111+
112+
// Utility function to return a list of formatted lecture ids
113+
func createLectureList(all bool, lectures string, lectureCount int) []int {
114+
var lectureList []int
115+
116+
if all {
117+
lectureList = makeRange(lectureCount)
118+
} else {
119+
tempList := strings.Split(lectures, ",")
120+
for _, num := range tempList {
121+
val, _ := strconv.Atoi(num)
122+
lectureList = append(lectureList, val)
123+
}
124+
}
125+
126+
return lectureList
127+
}
128+
129+
func main() {
130+
131+
// Define flags and base URLs
132+
var (
133+
help = flag.Bool("help", false, "Display help")
134+
courseName = flag.String("course", "", "Course name e.g. CS229, EE261")
135+
typeFlag = flag.String("type", "pdf", "[video | pdf]. Defaults to pdf.")
136+
all = flag.Bool("all", false, "Download material for all lectures for the given course")
137+
lectures = flag.String("lec", "", "Specify comma separated list of lectures e.g 1,3,10")
138+
siteBaseURL = "https://see.stanford.edu"
139+
courseBaseURL = "https://see.stanford.edu/Course/"
140+
videoBaseURL = "http://html5.stanford.edu/videos/courses/see/"
141+
)
142+
143+
// Parse the command line flags
144+
flag.Parse()
145+
146+
// Display help and quit
147+
if *help == true {
148+
fmt.Println(helpString)
149+
return
150+
}
151+
152+
// Check for required -course flag
153+
if len(*courseName) == 0 {
154+
fmt.Println("Please specify a Course code")
155+
return
156+
}
157+
158+
// Check for valid value for -type flag
159+
if strings.Compare("pdf", *typeFlag) != 0 && strings.Compare("video", *typeFlag) != 0 {
160+
fmt.Println("[video | pdf] are the only accepted values for -type flag")
161+
return
162+
}
163+
164+
courseURL := courseBaseURL + *courseName
165+
166+
// Get HTML content of the course page
167+
resp, err := soup.Get(courseURL)
168+
169+
if err != nil {
170+
fmt.Println("Error fetching course details. Check your internet connection!!")
171+
return
172+
}
173+
174+
// Parse HTML content of course page
175+
doc := soup.HTMLParse(resp)
176+
177+
// Set BaseURL, path and file extension
178+
var (
179+
baseURL, extension string
180+
paths []string
181+
linkParentTags []soup.Root
182+
lectureCount int
183+
lectureList []int
184+
)
185+
186+
if strings.Compare(*typeFlag, "video") == 0 {
187+
// For Videos
188+
baseURL = videoBaseURL
189+
extension = "mp4"
190+
linkParentTags = doc.FindAll("table", "class", "table")
191+
lectureList = createLectureList(*all, *lectures, len(linkParentTags))
192+
lectureCount = len(lectureList)
193+
factor = 1000000
194+
for i := 0; i < lectureCount; i++ {
195+
paths = append(paths, fmt.Sprintf("%s/%s-lecture%02d.%s", *courseName, *courseName, lectureList[i], extension))
196+
}
197+
198+
} else {
199+
// For PDFs
200+
baseURL = siteBaseURL
201+
extension = "pdf"
202+
linkParentTags = doc.FindAll("ul", "class", "list-inline")
203+
lectureList = createLectureList(*all, *lectures, len(linkParentTags))
204+
lectureCount = len(lectureList)
205+
factor = 1000
206+
for i := 0; i < lectureCount; i++ {
207+
lectureId := lectureList[i]
208+
href := linkParentTags[lectureId-1].FindAll("a")[1].Attrs()["href"]
209+
paths = append(paths, href)
210+
}
211+
212+
}
213+
214+
// Resize the progress bar array
215+
bars = bars[:lectureCount]
216+
217+
fmt.Printf("Found %d lectures for course %s\n", lectureCount, *courseName)
218+
var wg sync.WaitGroup
219+
220+
// Listen for download progresses
221+
uiprogress.Start()
222+
223+
for i := 0; i < lectureCount; i++ {
224+
url := baseURL + paths[i]
225+
226+
fileName := fmt.Sprintf("%s-lecture%02d.%s", *courseName, lectureList[i], extension)
227+
228+
// fetch lecture concurrently
229+
wg.Add(1)
230+
go downloadLecture(i, url, fileName, &wg)
231+
}
232+
233+
// Wait for all lectures to be downloaded
234+
wg.Wait()
235+
}

0 commit comments

Comments
 (0)