From 771c6dd2301eb5105454da5b41c9ddae98c3944f Mon Sep 17 00:00:00 2001 From: Eric Lynema Date: Sat, 20 Dec 2025 23:15:24 -0500 Subject: [PATCH 1/3] Implement improvements: rayon parallelism, safer CLI, output file option, tests, CI workflow, tuned release profile --- .github/workflows/ci.yml | 27 ++++++++ Cargo.toml | 11 +++- README.md | 42 ++++++------ src/main.rs | 134 +++++++++++++++++++++++++++------------ 4 files changed, 151 insertions(+), 63 deletions(-) create mode 100644 .github/workflows/ci.yml diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..bc6ed7a --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,27 @@ +name: CI + +on: + push: + branches: [ "**" ] + pull_request: + branches: [ "**" ] + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Install Rust toolchain + uses: actions-rs/toolchain@v1 + with: + toolchain: stable + profile: minimal + override: true + - name: Run cargo fmt check + run: cargo fmt -- --check + - name: Run clippy + run: cargo clippy --all-targets --all-features -- -D warnings + - name: Run tests + run: cargo test --verbose + - name: Build release + run: cargo build --release --verbose diff --git a/Cargo.toml b/Cargo.toml index 499c787..d204cae 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,8 +1,17 @@ [package] name = "pi" version = "0.1.0" -edition = "2024" +edition = "2021" [dependencies] clap = { version = "4.5.4", features = ["derive"] } rug = "1.24.1" +rayon = "1.7" + +[dev-dependencies] +criterion = "0.4" + +[profile.release] +opt-level = 3 +lto = true +codegen-units = 1 diff --git a/README.md b/README.md index bd8a499..ded2b49 100644 --- a/README.md +++ b/README.md @@ -1,20 +1,18 @@ # Pi Calculator -This is a multi-threaded Rust program that calculates the first n digits of Pi using the Bailey–Borwein–Plouffe (BBP) formula. It uses arbitrary-precision arithmetic to ensure the accuracy of the calculated digits. +This is a multi-threaded Rust program that calculates the first n digits of Pi using the Bailey–Borwein–Plouffe (BBP) formula. It uses arbitrary-precision arithmetic (rug) and parallelism (rayon). -## Features +## Improvements in this branch -* Calculates the first n digits of Pi. -* Multi-threaded to speed up the calculation. -* Configurable number of threads. -* Uses the BBP algorithm. -* High-precision calculation using the `rug` crate. +* Parallelized BBP summation with rayon for better thread control and load balancing. +* Safer argument validation and error handling (avoids unwraps on runtime errors). +* Optional output-to-file support. +* Added CI workflow to run formatting, clippy, tests and build on push/PR. +* Release profile tuned for better optimized builds (LTO, opt-level=3). ## Building -To build the program, you need to have Rust and Cargo installed. You can install them from [https://rustup.rs/](https://rustup.rs/). - -Once you have Rust and Cargo installed, you can build the program with the following command: +Requires Rust and Cargo. Build with: ```bash cargo build --release @@ -22,26 +20,28 @@ cargo build --release ## Usage -To run the program, you can use the following command: - ```bash ./target/release/pi [OPTIONS] ``` -### Arguments +Arguments -* ``: The number of digits of Pi to calculate. +* ``: Number of digits after the decimal point to calculate. -### Options +Options -* `-t`, `--threads `: The number of threads to use. Defaults to 4. -* `-h`, `--help`: Print help information. -* `-V`, `--version`: Print version information. +* `-t`, `--threads `: Number of threads to use (default 4). +* `-o`, `--output `: Write output to FILE instead of stdout. +* `-h`, `--help`: Print help. -### Example +Example -To calculate the first 1000 digits of Pi using 8 threads, you can run the following command: +Calculate 1000 digits using 8 threads and write to a file: ```bash -./target/release/pi 1000 -t 8 +./target/release/pi 1000 -t 8 -o pi1000.txt ``` + +Notes + +For very large numbers of digits, using a decimal-friendly algorithm such as Chudnovsky (with binary splitting) will be far faster and more memory-efficient than BBP; consider switching to Chudnovsky for production-grade large computations. diff --git a/src/main.rs b/src/main.rs index 149f296..6aea6bd 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,19 +1,28 @@ use clap::Parser; -use rug::{Float, ops::Pow}; -use std::thread; +use rug::Float; +use rug::ops::Pow; +use rayon::prelude::*; +use std::fs::File; +use std::io::Write; +use std::path::PathBuf; #[derive(Parser, Debug)] #[command(author, version, about, long_about = None)] struct Args { - /// Number of digits of Pi to calculate + /// Number of digits of Pi to calculate (digits after the decimal point) n: u32, /// Number of threads to use #[arg(short, long, default_value_t = 4)] threads: usize, + + /// Optional output file (writes result there if provided) + #[arg(short, long)] + output: Option, } fn bbp_term(k: u32, prec: u32) -> Float { + // Compute one BBP term at precision `prec`. let mut term = Float::with_val(prec, 4); term /= Float::with_val(prec, 8 * k + 1); @@ -30,52 +39,95 @@ fn bbp_term(k: u32, prec: u32) -> Float { term -= term4; let sixteen = Float::with_val(prec, 16); - term /= sixteen.pow(k); + term /= sixteen.pow(k as i32); term } -fn main() { - let args = Args::parse(); - let n = args.n; - let num_threads = args.threads; - - // Precision for rug::Float. We need a bit more than n decimal digits. - // log2(10) is approx 3.32. So, we need n * 3.32 bits. - let prec = (n as f64 * 3.33).ceil() as u32 + 10; - - let num_terms = n + 5; // Use more terms for better accuracy - let terms_per_thread = (num_terms + num_threads as u32 - 1) / num_threads as u32; - - let mut handles = vec![]; - - for i in 0..num_threads { - let start = i as u32 * terms_per_thread; - let end = ((i + 1) as u32 * terms_per_thread).min(num_terms); - let handle = thread::spawn(move || { - let mut partial_sum = Float::with_val(prec, 0); - for k in start..end { - partial_sum += bbp_term(k, prec); - } - partial_sum - }); - handles.push(handle); +/// Calculate Pi to `n` decimal digits using a parallelized BBP summation. +/// Returns a decimal string containing Pi truncated to `n` digits after the decimal point. +pub fn calculate_pi(n: u32, num_threads: usize) -> Result { + if n == 0 { + return Err("n must be > 0".into()); + } + if num_threads == 0 { + return Err("threads must be > 0".into()); } - let mut pi = Float::with_val(prec, 0); - for handle in handles { - pi += handle.join().unwrap(); - } + // Bits of precision: log2(10) ~= 3.321928. Add some guard bits. + let prec = (n as f64 * 3.3219280948873626).ceil() as u32 + 20; - // The user wants n digits after the decimal, and the output to be truncated. - // We can achieve this by getting a string with more precision and then truncating it. - let pi_string = pi.to_string_radix(10, Some(n as usize + 5)); // Get extra digits for accurate truncation - let dot_pos = pi_string.find('.').unwrap_or(1); + // BBP converges in base-16; use a modest overestimate for term count. + let num_terms = (n as usize / 1) + 20; // conservative + + // Use rayon thread pool to control threads for parallel work. + let pool = rayon::ThreadPoolBuilder::new() + .num_threads(num_threads) + .build() + .map_err(|e| format!("Failed to build thread pool: {}", e))?; + + let pi = pool.install(|| { + // Parallel iterator over term indices. + (0..num_terms as u32) + .into_par_iter() + .map(|k| bbp_term(k, prec)) + .reduce(|| Float::with_val(prec, 0), |a, b| a + b) + }); + + // Convert to decimal string with a few extra digits for safe truncation. + let extra = 10usize; + let pi_string = pi.to_string_radix(10, Some(n as usize + extra)); + + // Find dot safely and truncate or pad as needed. + let dot_pos = pi_string.find('.').unwrap_or(pi_string.len()); let end_pos = dot_pos + 1 + n as usize; - if pi_string.len() > end_pos { - println!("Pi: {}", &pi_string[..end_pos]); + let out = if pi_string.len() >= end_pos { + pi_string[..end_pos].to_string() } else { - println!("Pi: {}", pi_string); + // If not enough digits were produced, pad with zeros. + let mut s = pi_string; + if !s.contains('.') { + s.push('.'); + } + while s.len() < end_pos { + s.push('0'); + } + s + }; + + Ok(out) +} + +fn main() { + let args = Args::parse(); + + match calculate_pi(args.n, args.threads) { + Ok(pi_str) => { + if let Some(path) = args.output { + match File::create(&path) { + Ok(mut f) => { + if let Err(e) = writeln!(f, "{}", pi_str) { + eprintln!("Failed to write to {}: {}", path.display(), e); + } + } + Err(e) => eprintln!("Failed to create {}: {}", path.display(), e), + } + } else { + println!("Pi: {}", pi_str); + } + } + Err(e) => eprintln!("Error: {}", e), } -} \ No newline at end of file +} + +#[cfg(test)] +mod tests { + use super::calculate_pi; + + #[test] + fn pi_10_digits() { + let pi = calculate_pi(10, 2).expect("calculation failed"); + assert_eq!(pi, "3.1415926535"); + } +} From ed5c349796859f1fa03c5b0421db842f8b7c5077 Mon Sep 17 00:00:00 2001 From: Eric Lynema Date: Sat, 20 Dec 2025 23:21:36 -0500 Subject: [PATCH 2/3] Implement Chudnovsky binary-splitting Pi calculator (replace BBP) --- src/main.rs | 100 ++++++++++++++++++++++++++-------------------------- 1 file changed, 50 insertions(+), 50 deletions(-) diff --git a/src/main.rs b/src/main.rs index 6aea6bd..87a606a 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,7 +1,5 @@ use clap::Parser; -use rug::Float; -use rug::ops::Pow; -use rayon::prelude::*; +use rug::{Float, Integer, ops::Pow}; use std::fs::File; use std::io::Write; use std::path::PathBuf; @@ -12,7 +10,7 @@ struct Args { /// Number of digits of Pi to calculate (digits after the decimal point) n: u32, - /// Number of threads to use + /// Number of threads to use (kept for compatibility; Chudnovsky is CPU bound) #[arg(short, long, default_value_t = 4)] threads: usize, @@ -21,58 +19,61 @@ struct Args { output: Option, } -fn bbp_term(k: u32, prec: u32) -> Float { - // Compute one BBP term at precision `prec`. - let mut term = Float::with_val(prec, 4); - term /= Float::with_val(prec, 8 * k + 1); - - let mut term2 = Float::with_val(prec, 2); - term2 /= Float::with_val(prec, 8 * k + 4); - term -= term2; - - let mut term3 = Float::with_val(prec, 1); - term3 /= Float::with_val(prec, 8 * k + 5); - term -= term3; - - let mut term4 = Float::with_val(prec, 1); - term4 /= Float::with_val(prec, 8 * k + 6); - term -= term4; - - let sixteen = Float::with_val(prec, 16); - term /= sixteen.pow(k as i32); - - term +// Binary splitting for the Chudnovsky algorithm. +// Returns (P, Q, T) as big integers for the interval [a, b) +fn bs(a: u64, b: u64) -> (Integer, Integer, Integer) { + if b - a == 1 { + if a == 0 { + // P = 1, Q = 1, T = 13591409 + return (Integer::from(1), Integer::from(1), Integer::from(13591409)); + } + let a_i = Integer::from(a as i128); + let p: Integer = (Integer::from(6 * a as i128 - 5) + * Integer::from(2 * a as i128 - 1) + * Integer::from(6 * a as i128 - 1)) + .into(); + let q: Integer = (Integer::from(a as i128).pow(3) * Integer::from(640320i128).pow(3)).into(); + let mut t: Integer = (p.clone() * Integer::from(13591409i128 + 545140134i128 * a_i)).into(); + if a % 2 == 1 { + t = -t; + } + return (p, q, t); + } + let m = (a + b) / 2; + let (p1, q1, t1) = bs(a, m); + let (p2, q2, t2) = bs(m, b); + let p = (&p1 * &p2).into(); + let q = (&q1 * &q2).into(); + let t1q2: Integer = (&t1 * &q2).into(); + let p1t2: Integer = (&p1 * &t2).into(); + let t = t1q2 + p1t2; + (p, q, t) } -/// Calculate Pi to `n` decimal digits using a parallelized BBP summation. -/// Returns a decimal string containing Pi truncated to `n` digits after the decimal point. -pub fn calculate_pi(n: u32, num_threads: usize) -> Result { +/// Calculate Pi to `n` decimal digits using the Chudnovsky algorithm (binary splitting). +pub fn calculate_pi_chudnovsky(n: u32) -> Result { if n == 0 { return Err("n must be > 0".into()); } - if num_threads == 0 { - return Err("threads must be > 0".into()); - } - // Bits of precision: log2(10) ~= 3.321928. Add some guard bits. + // Each term of Chudnovsky yields ~14.181647462725477 decimal digits + let digits_per_term = 14.181647462725477; + let terms = ((n as f64) / digits_per_term).ceil() as u64 + 1; + + // Bits of precision: log2(10) ~= 3.321928. Add guard bits. let prec = (n as f64 * 3.3219280948873626).ceil() as u32 + 20; - // BBP converges in base-16; use a modest overestimate for term count. - let num_terms = (n as usize / 1) + 20; // conservative + let (_p, q, t) = bs(0, terms); - // Use rayon thread pool to control threads for parallel work. - let pool = rayon::ThreadPoolBuilder::new() - .num_threads(num_threads) - .build() - .map_err(|e| format!("Failed to build thread pool: {}", e))?; + // Convert big integers to high-precision floats + let prec_u = prec as u32; + let qf = Float::with_val(prec_u, q); + let tf = Float::with_val(prec_u, t); - let pi = pool.install(|| { - // Parallel iterator over term indices. - (0..num_terms as u32) - .into_par_iter() - .map(|k| bbp_term(k, prec)) - .reduce(|| Float::with_val(prec, 0), |a, b| a + b) - }); + // C = 426880 * sqrt(10005) + let c = Float::with_val(prec_u, 426880) * Float::with_val(prec_u, 10005).sqrt(); + + let pi = c * qf / tf; // Convert to decimal string with a few extra digits for safe truncation. let extra = 10usize; @@ -85,7 +86,6 @@ pub fn calculate_pi(n: u32, num_threads: usize) -> Result { let out = if pi_string.len() >= end_pos { pi_string[..end_pos].to_string() } else { - // If not enough digits were produced, pad with zeros. let mut s = pi_string; if !s.contains('.') { s.push('.'); @@ -102,7 +102,7 @@ pub fn calculate_pi(n: u32, num_threads: usize) -> Result { fn main() { let args = Args::parse(); - match calculate_pi(args.n, args.threads) { + match calculate_pi_chudnovsky(args.n) { Ok(pi_str) => { if let Some(path) = args.output { match File::create(&path) { @@ -123,11 +123,11 @@ fn main() { #[cfg(test)] mod tests { - use super::calculate_pi; + use super::calculate_pi_chudnovsky; #[test] fn pi_10_digits() { - let pi = calculate_pi(10, 2).expect("calculation failed"); + let pi = calculate_pi_chudnovsky(10).expect("calculation failed"); assert_eq!(pi, "3.1415926535"); } } From 73d532520377d8f4997642a3931f365b9eff25dc Mon Sep 17 00:00:00 2001 From: Eric Lynema Date: Sat, 20 Dec 2025 23:23:56 -0500 Subject: [PATCH 3/3] Parallelize binary-splitting using rayon::join --- src/main.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/main.rs b/src/main.rs index 87a606a..9b79128 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,5 +1,6 @@ use clap::Parser; use rug::{Float, Integer, ops::Pow}; +use rayon::join; use std::fs::File; use std::io::Write; use std::path::PathBuf; @@ -40,8 +41,9 @@ fn bs(a: u64, b: u64) -> (Integer, Integer, Integer) { return (p, q, t); } let m = (a + b) / 2; - let (p1, q1, t1) = bs(a, m); - let (p2, q2, t2) = bs(m, b); + let (left, right) = join(|| bs(a, m), || bs(m, b)); + let (p1, q1, t1) = left; + let (p2, q2, t2) = right; let p = (&p1 * &p2).into(); let q = (&q1 * &q2).into(); let t1q2: Integer = (&t1 * &q2).into();