Submit Info #64675

Problem Lang User Status Time Memory
Polynomial Taylor Shift cpp anonymous AC 91 ms 24.36 MiB

ケース詳細
Name Status Time Memory
example_00 AC 1 ms 2.32 MiB
example_01 AC 1 ms 2.32 MiB
fft_killer_00 AC 91 ms 24.36 MiB
fft_killer_01 AC 89 ms 24.24 MiB
max_random_00 AC 90 ms 24.32 MiB
max_random_01 AC 90 ms 24.31 MiB
medium_00 AC 1 ms 2.32 MiB
medium_01 AC 2 ms 2.57 MiB
medium_02 AC 2 ms 2.57 MiB
medium_all_zero_00 AC 1 ms 2.32 MiB
medium_c_zero_00 AC 1 ms 2.32 MiB
random_00 AC 83 ms 20.67 MiB
random_01 AC 87 ms 22.68 MiB
random_02 AC 11 ms 4.78 MiB
small_00 AC 1 ms 2.32 MiB
small_01 AC 1 ms 2.32 MiB
small_02 AC 1 ms 2.32 MiB
small_03 AC 1 ms 2.32 MiB
small_04 AC 1 ms 2.32 MiB
small_05 AC 1 ms 2.32 MiB
small_06 AC 1 ms 2.32 MiB
small_07 AC 1 ms 2.32 MiB
small_08 AC 1 ms 2.32 MiB
small_09 AC 1 ms 2.32 MiB
small_10 AC 1 ms 2.30 MiB
small_11 AC 1 ms 2.32 MiB
small_12 AC 1 ms 2.32 MiB
small_13 AC 1 ms 2.32 MiB
small_14 AC 1 ms 2.32 MiB
small_15 AC 1 ms 2.27 MiB

#pragma region opt #pragma GCC optimize("O3") #pragma GCC optimize("fast-math") #pragma GCC optimize("unroll-loops") #pragma GCC target("avx2,tune=native") #pragma endregion opt #pragma region header #define _GNU_SOURCE #include <stdbool.h> #include <stdint.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <assert.h> #include <limits.h> #include <math.h> #include <time.h> #pragma endregion header #pragma region type /* signed integer */ typedef int8_t i8; typedef int16_t i16; typedef int32_t i32; typedef int64_t i64; typedef __int128_t i128; /* unsigned integer */ typedef uint8_t u8; typedef uint16_t u16; typedef uint32_t u32; typedef uint64_t u64; typedef __uint128_t u128; /* floating point number */ typedef float f32; typedef double f64; typedef long double f80; #pragma endregion type #pragma region macro #define MIN(a, b) (((a) < (b)) ? (a) : (b)) #define MAX(a, b) (((a) > (b)) ? (a) : (b)) #define SWAP(a, b) (((a) ^= (b)), ((b) ^= (a)), ((a) ^= (b))) #define POPCNT32(a) __builtin_popcount((a)) #define POPCNT64(a) __builtin_popcountll((a)) #define CTZ32(a) __builtin_ctz((a)) #define CLZ32(a) __builtin_clz((a)) #define CTZ64(a) __builtin_ctzll((a)) #define CLZ64(a) __builtin_clzll((a)) #define HAS_SINGLE_BIT32(a) (__builtin_popcount((a)) == (1)) #define HAS_SINGLE_BIT64(a) (__builtin_popcountll((a)) == (1)) #define MSB32(a) ((31) - __builtin_clz((a))) #define MSB64(a) ((63) - __builtin_clzll((a))) #define BIT_WIDTH32(a) ((a) ? ((32) - __builtin_clz((a))) : (0)) #define BIT_WIDTH64(a) ((a) ? ((64) - __builtin_clzll((a))) : (0)) #define LSBit(a) ((a) & (-(a))) #define CLSBit(a) ((a) & ((a) - (1))) #define BIT_CEIL32(a) ((!(a)) ? (1) : ((POPCNT32(a)) == (1) ? ((1u) << ((31) - CLZ32((a)))) : ((1u) << ((32) - CLZ32(a))))) #define BIT_CEIL64(a) ((!(a)) ? (1) : ((POPCNT64(a)) == (1) ? ((1ull) << ((63) - CLZ64((a)))) : ((1ull) << ((64) - CLZ64(a))))) #define BIT_FLOOR32(a) ((!(a)) ? (0) : ((1u) << ((31) - CLZ32((a))))) #define BIT_FLOOR64(a) ((!(a)) ? (0) : ((1ull) << ((63) - CLZ64((a))))) #define _ROTL64(x, s) (((x) << ((s) % (64))) | (((x) >> ((64) - ((s) % (64)))))) #define _ROTR64(x, s) (((x) >> ((s) % (64))) | (((x) << ((64) - ((s) % (64)))))) #define ROTL64(x, s) (((s) == (0)) ? (x) : ((((i128)(s)) < (0)) ? (_ROTR64((x), -(s))) : (_ROTL64((x), (s))))) #define ROTR64(x, s) (((s) == (0)) ? (x) : ((((i128)(s)) < (0)) ? (_ROTL64((x), -(s))) : (_ROTR64((x), (s))))) #pragma endregion macro #pragma region io static inline int read_int(void) { // -2147483648 ~ 2147483647 (> 10 ^ 9) int c, x = 0, f = 1; while (c = getchar_unlocked(), c < 48 || c > 57) if (c == 45) f = -f; while (47 < c && c < 58) { x = x * 10 + c - 48; c = getchar_unlocked(); } return f * x; } static inline i32 in_i32(void) { // -2147483648 ~ 2147483647 (> 10 ^ 9) i32 c, x = 0, f = 1; while (c = getchar_unlocked(), c < 48 || c > 57) if (c == 45) f = -f; while (47 < c && c < 58) { x = x * 10 + c - 48; c = getchar_unlocked(); } return f * x; } static inline u32 in_u32(void) { // 0 ~ 4294967295 (> 10 ^ 9) u32 c, x = 0; while (c = getchar_unlocked(), c < 48 || c > 57); while (47 < c && c < 58) { x = x * 10 + c - 48; c = getchar_unlocked(); } return x; } static inline i64 in_i64(void) { // -9223372036854775808 ~ 9223372036854775807 (> 10 ^ 18) i64 c, x = 0, f = 1; while (c = getchar_unlocked(), c < 48 || c > 57) if (c == 45) f = -f; while (47 < c && c < 58) { x = x * 10 + c - 48; c = getchar_unlocked(); } return f * x; } static inline u64 in_u64(void) { // 0 ~ 18446744073709551615 (> 10 ^ 19) u64 c, x = 0; while (c = getchar_unlocked(), c < 48 || c > 57); while (47 < c && c < 58) { x = x * 10 + c - 48; c = getchar_unlocked(); } return x; } static inline void write_int_inner(int x) { if (x >= 10) write_int_inner(x / 10); putchar_unlocked(x - x / 10 * 10 + 48); } static inline void write_int(int x) { if (x < 0) { putchar_unlocked('-'); x = -x; } write_int_inner(x); } static inline void out_i32_inner(i32 x) { if (x >= 10) out_i32_inner(x / 10); putchar_unlocked(x - x / 10 * 10 + 48); } static inline void out_i32(i32 x) { if (x < 0) { putchar_unlocked('-'); x = -x; } out_i32_inner(x); } static inline void out_u32(u32 x) { if (x >= 10) out_u32(x / 10); putchar_unlocked(x - x / 10 * 10 + 48); } static inline void out_i64_inner(i64 x) { if (x >= 10) out_i64_inner(x / 10); putchar_unlocked(x - x / 10 * 10 + 48); } static inline void out_i64(i64 x) { if (x < 0) { putchar_unlocked('-'); x = -x; } out_i64_inner(x); } static inline void out_u64(u64 x) { if (x >= 10) out_u64(x / 10); putchar_unlocked(x - x / 10 * 10 + 48); } static inline void NL(void) { putchar_unlocked('\n'); } static inline void SP(void) { putchar_unlocked(' '); } #pragma endregion io #pragma region dump void dump_int(int x) { fprintf(stderr, "\033[1;31m%d\033[0m\n", x); } void dump_i32(i32 x) { fprintf(stderr, "\033[1;32m%d\033[0m\n", x); } void dump_i64(i64 x) { fprintf(stderr, "\033[1;33m%ld\033[0m\n", x); } void dump_u32(u32 x) { fprintf(stderr, "\033[1;34m%u\033[0m\n", x); } void dump_u64(u64 x) { fprintf(stderr, "\033[1;35m%lu\033[0m\n", x); } void dump_int_array(int *a, int a_len) { fprintf(stderr, "\033[1;31m"); for (int i = 0; i < a_len; i++) { if (i == a_len - 1) fprintf(stderr, "%d\n", a[i]); else fprintf(stderr, "%d ", a[i]); } fprintf(stderr, "\033[0m"); } void dump_i32_array(i32 *a, int a_len) { fprintf(stderr, "\033[1;32m"); for (int i = 0; i < a_len; i++) { if (i == a_len - 1) fprintf(stderr, "%d\n", a[i]); else fprintf(stderr, "%d ", a[i]); } fprintf(stderr, "\033[0m"); } void dump_i64_array(i64 *a, int a_len) { fprintf(stderr, "\033[1;33m"); for (int i = 0; i < a_len; i++) { if (i == a_len - 1) fprintf(stderr, "%ld\n", a[i]); else fprintf(stderr, "%ld ", a[i]); } fprintf(stderr, "\033[0m"); } void dump_u32_array(u32 *a, int a_len) { fprintf(stderr, "\033[1;34m"); for (int i = 0; i < a_len; i++) { if (i == a_len - 1) fprintf(stderr, "%u\n", a[i]); else fprintf(stderr, "%u ", a[i]); } fprintf(stderr, "\033[0m"); } void dump_u64_array(u64 *a, int a_len) { fprintf(stderr, "\033[1;35m"); for (int i = 0; i < a_len; i++) { if (i == a_len - 1) fprintf(stderr, "%lu\n", a[i]); else fprintf(stderr, "%lu ", a[i]); } fprintf(stderr, "\033[0m"); } void dump_int_array_range(int *a, int a_len, int l, int r) { if (r < l || r <= 0 || l >= a_len) return; if (l < 0) l = 0; if (r >= a_len) r = a_len - 1; fprintf(stderr, "\033[1;31m"); for (int i = l; i <= r; i++) { if (i == r) fprintf(stderr, "%d\n", a[i]); else fprintf(stderr, "%d ", a[i]); } fprintf(stderr, "\033[0m"); } void dump_i32_array_range(i32 *a, int a_len, int l, int r) { if (r < l || r <= 0 || l >= a_len) return; if (l < 0) l = 0; if (r >= a_len) r = a_len - 1; fprintf(stderr, "\033[1;32m"); for (int i = l; i <= r; i++) { if (i == r) fprintf(stderr, "%d\n", a[i]); else fprintf(stderr, "%d ", a[i]); } fprintf(stderr, "\033[0m"); } void dump_i64_array_range(i64 *a, int a_len, int l, int r) { if (r < l || r <= 0 || l >= a_len) return; if (l < 0) l = 0; if (r >= a_len) r = a_len - 1; fprintf(stderr, "\033[1;33m"); for (int i = l; i <= r; i++) { if (i == r) fprintf(stderr, "%ld\n", a[i]); else fprintf(stderr, "%ld ", a[i]); } fprintf(stderr, "\033[0m"); } void dump_u32_array_range(u32 *a, int a_len, int l, int r) { if (r < l || r <= 0 || l >= a_len) return; if (l < 0) l = 0; if (r >= a_len) r = a_len - 1; fprintf(stderr, "\033[1;34m"); for (int i = l; i <= r; i++) { if (i == r) fprintf(stderr, "%u\n", a[i]); else fprintf(stderr, "%u ", a[i]); } fprintf(stderr, "\033[0m"); } void dump_u64_array_range(u64 *a, int a_len, int l, int r) { if (r < l || r <= 0 || l >= a_len) return; if (l < 0) l = 0; if (r >= a_len) r = a_len - 1; fprintf(stderr, "\033[1;35m"); for (int i = l; i <= r; i++) { if (i == r) fprintf(stderr, "%lu\n", a[i]); else fprintf(stderr, "%lu ", a[i]); } fprintf(stderr, "\033[0m"); } void printb(u64 v) { u64 mask = (u64)1 << (sizeof(v) * CHAR_BIT - 1); do putchar(mask & v ? '1' : '0'); while (mask >>= 1); } void putb(u64 v) { putchar('0'), putchar('b'), printb(v), putchar('\n'); } #pragma endregion dump #pragma region montgomery32 typedef uint32_t m32; m32 _one_m32(u32 mod) { return -1u % mod + 1; } m32 _r2_m32(u32 mod) { return (u64)(i64)-1 % mod + 1; } m32 _inv_m32(u32 mod) { u32 u = 1, v = 0, x = 1u << 31; for (int i = 0; i < 32; i++) { if (u & 1) u = (u + mod) >> 1, v = (v >> 1) + x; else u >>= 1, v >>= 1; } return -v; } m32 _reduce_m32(u64 a, m32 inv, u32 mod) { i64 z = (a >> 32) - ((((u32)a * inv) * (u64)mod) >> 32); return z < 0 ? z + mod : z; } m32 to_m32(u32 a, m32 r2, m32 inv, u32 mod) { return _reduce_m32((u64)a * r2, inv, mod); } u32 from_m32(m32 A, m32 inv, u32 mod) { m32 t = _reduce_m32((u64)A, inv, mod) - mod; return t + (mod & -(t >> 31u)); } m32 add_m32(m32 A, m32 B, u32 mod2) { // assert(mod2 == (mod << 1)); A += B - mod2; A += mod2 & -(A >> 31u); return A; } m32 sub_m32(m32 A, m32 B, u32 mod2) { // assert(mod2 == (mod << 1)); A -= B; A += mod2 & -(A >> 31u); return A; } m32 min_m32(m32 A, u32 mod2) { // assert(mod2 == (mod << 1)); return sub_m32(0u, A, mod2); } m32 mul_m32(m32 A, m32 B, m32 inv, u32 mod) { return _reduce_m32((u64)A * B, inv, mod); } m32 pow_m32(m32 A, i64 n, m32 inv, u32 mod) { m32 ret = _one_m32(mod); while (n > 0) { if (n & 1) ret = mul_m32(ret, A, inv, mod); A = mul_m32(A, A, inv, mod); n >>= 1; } return ret; } m32 inv_m32(m32 A, m32 inv, u32 mod) { return pow_m32(A, (i64)mod - 2, inv, mod); } m32 div_m32(m32 A, m32 B, m32 inv, u32 mod) { /* assert(is_prime(mod)); */ return mul_m32(A, inv_m32(B, inv, mod), inv, mod); } m32 in_m32(m32 r2, m32 inv, u32 mod) { u32 c, a = 0; while (c = getchar_unlocked(), c < 48 || c > 57); while (47 < c && c < 58) { a = a * 10 + c - 48; c = getchar_unlocked(); } return to_m32(a, r2, inv, mod); } void out_m32(m32 A, m32 inv, u32 mod) { u32 a = from_m32(A, inv, mod); out_u32(a); } void dump_m32(m32 x, m32 inv, u32 mod) { fprintf(stderr, "\033[1;34m%u\033[0m\n", from_m32(x, inv, mod)); } void dump_m32_array(m32 *x, int x_len, m32 inv, u32 mod) { fprintf(stderr, "\033[1;37m"); for (int i = 0; i < x_len; i++) { if (i == x_len - 1) fprintf(stderr, "%u\n", x[i]); else fprintf(stderr, "%u ", x[i]); } fprintf(stderr, "\033[0m"); fprintf(stderr, "\033[1;33m"); for (int i = 0; i < x_len; i++) { if (i == x_len - 1) fprintf(stderr, "%u\n", from_m32(x[i], inv, mod)); else fprintf(stderr, "%u ", from_m32(x[i], inv, mod)); } fprintf(stderr, "\033[0m"); } void dump_m32_array_range(m32 *x, int x_len, int l, int r, m32 inv, u32 mod) { if (r < l || r <= 0 || l >= x_len) return; if (l < 0) l = 0; if (r >= x_len) r = x_len - 1; fprintf(stderr, "\033[1;31m"); for (int i = l; i <= r; i++) { if (i == r) fprintf(stderr, "%u\n", x[i]); else fprintf(stderr, "%u ", x[i]); } fprintf(stderr, "\033[0m"); fprintf(stderr, "\033[1;35m"); for (int i = l; i <= r; i++) { if (i == r) fprintf(stderr, "%u\n", from_m32(x[i], inv, mod)); else fprintf(stderr, "%u ", from_m32(x[i], inv, mod)); } fprintf(stderr, "\033[0m"); } #pragma endregion montgomery32 #pragma region ntt998244353 const u32 mod = 998244353u; const u32 mod2 = 1996488706u; const m32 r2 = 932051910u; const m32 inv = 3296722945u; const m32 one = 301989884u; const m32 rev = 696254469u; const m32 gs[] = { 691295370, 307583142, 566821959, 878217029, 375146819, 138254384, 500602490, 79119218, 790898700, 978335284, 651424567, 308706579, 723000027, 474797508, 683394121, 44141573, 536892010, 945865189, 175417726, 536169764, 831722880, 721458245 }; const m32 igs[] = { 306948983, 888603487, 138723248, 65668869, 842568658, 953245971, 195169681, 118717521, 792052763, 828450244, 908724728, 218560432, 628507989, 248210924, 566568154, 6285593, 82571768, 49985074, 225413092, 349167278, 61514562, 763211248 }; void ntt(m32 *A, int A_len) { int h = 0; while (A_len > (1 << h)) h++; for (int ph = 1; ph <= h; ph++) { int w = 1 << (ph - 1); int p = 1 << (h - ph); m32 now = one; for (int s = 0; s < w; s++) { int offset = s << (h - ph + 1); for (int i = 0; i < p; i++) { m32 l = A[i + offset]; m32 r = mul_m32(A[i + offset + p], now, inv, mod); A[i + offset] = add_m32(l, r, mod2); A[i + offset + p] = sub_m32(l, r, mod2); } now = mul_m32(now, gs[CTZ32(~s)], inv, mod); } } } void intt(m32 *A, int A_len) { int h = 0; while (A_len > (1 << h)) h++; for (int ph = h; ph >= 1; ph--) { int w = 1 << (ph - 1); int p = 1 << (h - ph); m32 inow = one; for (int s = 0; s < w; s++) { int offset = s << (h - ph + 1); for (int i = 0; i < p; i++) { m32 l = A[i + offset]; m32 r = A[i + offset + p]; A[i + offset] = add_m32(l, r, mod2); A[i + offset + p] = mul_m32(sub_m32(l, r, mod2), inow, inv, mod); } inow = mul_m32(inow, igs[CTZ32(~s)], inv, mod); } } m32 inv2t = inv_m32(to_m32(A_len, r2, inv, mod), inv, mod); for (int i = 0; i < A_len; i++) A[i] = mul_m32(A[i], inv2t, inv, mod); } m32 *convolute(m32 *A, int A_len, m32 *B, int B_len) { int ret_len = A_len + B_len - 1; int c_len = BIT_CEIL32(ret_len); m32 *c = (m32 *)calloc(c_len, sizeof(m32)); m32 *d = (m32 *)calloc(c_len, sizeof(m32)); m32 *ret = (m32 *)calloc(ret_len, sizeof(m32)); #ifdef LOCAL if (c == NULL || d == NULL || ret == NULL) { exit(EXIT_FAILURE); } #endif memcpy(c, A, sizeof(m32) * A_len); memcpy(d, B, sizeof(m32) * B_len); ntt(c, c_len); ntt(d, c_len); for (int i = 0; i < c_len; i++) c[i] = mul_m32(c[i], d[i], inv, mod); intt(c, c_len); memcpy(ret, c, sizeof(m32) * ret_len); free(c); free(d); return ret; } #pragma endregion ntt998244353 #pragma region FormalPowerSeries common m32 _fact[1<<20]; m32 _inv_fact[1<<20]; m32 _inv_table[1<<20]; void pre_fact(int n) { _fact[0] = 1; for (int i = 0; i <= n + 1; i++) _fact[i + 1] = mul_m32(_fact[i], to_m32(i + 1, r2, inv, mod), inv, mod); _inv_fact[n + 2] = inv_m32(_fact[n + 2], inv, mod); for (int i = n + 2; i > 0; i--) _inv_fact[i - 1] = mul_m32(_inv_fact[i], to_m32(i, r2, inv, mod), inv, mod); for (int i = 1; i <= n + 1; i++) _inv_table[i] = mul_m32(_inv_fact[i], _fact[i - 1], inv, mod); } #pragma endregion FormalPowerSeries common #pragma region FormalPowerSeries inner_product m32 *dot_fps(m32 *A, int A_len, m32 *B, int B_len, int n) { m32 *work = (m32 *)calloc(MAX(A_len, B_len), sizeof(m32)); m32 *ret = (m32 *)calloc(n, sizeof(m32)); #ifdef LOCAL if (work == NULL || ret == NULL) { exit(EXIT_FAILURE); } #endif for (int i = 0; i < MIN(A_len, B_len); i++) work[i] = mul_m32(A[i], B[i], inv, mod); if (n > MIN(A_len, B_len)) { for (int i = 0; i < MIN(A_len, B_len); i++) ret[i] = work[i]; for (int i = MIN(A_len, B_len); i < n; i++) ret[i] = 0; } else for (int i = 0; i < n; i++) ret[i] = work[i]; free(work); return ret; } #pragma endregion FormalPowerSeries inner_product #pragma region FormalPowerSeries reciprocal m32 *inv_fps(m32 *A, int A_len, int n) { // assert(A != NULL && A_len != 0); // assert(A[0] != 0); // assert(n >= 1); m32 *f = (m32 *)calloc(n, sizeof(m32)); m32 *g = (m32 *)calloc(3 << 19, sizeof(m32)); m32 *h = (m32 *)calloc(3 << 19, sizeof(m32)); #ifdef LOCAL if (f == NULL || g == NULL || h == NULL) { exit(EXIT_FAILURE); } #endif f[0] = inv_m32(A[0], inv, mod); for (int m = 1; m < n; m <<= 1) { for (int i = 0; i < MIN(m << 1, A_len); i++) g[i] = A[i]; for (int i = MIN(m << 1, A_len); i < (m << 1); i++) g[i] = 0; for (int i = 0; i < MIN(m << 1, n); i++) h[i] = f[i]; for (int i = MIN(m << 1, n); i < (m << 1); i++) h[i] = 0; ntt(g, m << 1); ntt(h, m << 1); for (int i = 0; i < (m << 1); i++) g[i] = mul_m32(g[i], h[i], inv, mod); intt(g, m << 1); for (int i = 0; i < m; i++) g[i] = 0; ntt(g, m << 1); for (int i = 0; i < (m << 1); i++) g[i] = mul_m32(g[i], h[i], inv, mod); intt(g, m << 1); for (int i = m, i0 = MIN(m << 1, n); i < i0; i++) f[i] = min_m32(g[i], mod2); } free(g); free(h); return f; } #pragma endregion FormalPowerSeries reciprocal #pragma region FormalPowerSeries exponential m32 *exp_fps(m32 *A, int A_len, int n) { // assert(A != NULL && A_len != 0); // assert(A[0] == 0); // assert(n >= 1); if (n == 1) { m32 *ret = (m32 *)calloc(1, sizeof(m32)); #ifdef LOCAL if (ret == NULL) { exit(EXIT_FAILURE); } #endif ret[0] = one; return ret; } if (n == 2) { m32 *ret = (m32 *)calloc(2, sizeof(m32)); #ifdef LOCAL if (ret == NULL) { exit(EXIT_FAILURE); } #endif ret[0] = one; ret[1] = 1 < A_len ? A[1]: 0; return ret; } m32 *work0 = (m32 *)calloc(1u << 20, sizeof(m32)); m32 *work1 = (m32 *)calloc(1u << 20, sizeof(m32)); m32 *work2 = (m32 *)calloc(1u << 20, sizeof(m32)); m32 *work3 = (m32 *)calloc(1u << 20, sizeof(m32)); m32 *ret = (m32 *)calloc(n, sizeof(m32)); #ifdef LOCAL if (work0 == NULL || work1 == NULL || work2 == NULL || work3 == NULL || ret == NULL) { exit(EXIT_FAILURE); } #endif ret[0] = work1[0] = work1[1] = work2[0] = one; int m; for (m = 1; (m << 1) < n; m <<= 1) { for (int i = 0, i0 = MIN(m, A_len); i < i0; i++) work0[i] = mul_m32(to_m32(i, r2, inv, mod), A[i], inv, mod); for (int i = MIN(m, A_len); i < m; i++) work0[i] = 0; ntt(work0, m); for (int i = 0; i < m; i++) work0[i] = mul_m32(work0[i], work1[i], inv, mod); intt(work0, m); for (int i = 0; i < m; i++) work0[i] = sub_m32(work0[i], mul_m32(to_m32(i, r2, inv, mod), ret[i], inv, mod), mod2); for (int i = m; i < (m << 1); i++) work0[i] = 0; ntt(work0, m << 1); for (int i = 0; i < m; i++) work3[i] = work2[i]; for (int i = m; i < (m << 1); i++) work3[i] = 0; ntt(work3, m << 1); for (int i = 0; i < (m << 1); i++) work0[i] = mul_m32(work0[i], work3[i], inv, mod); intt(work0, m << 1); for (int i = 0; i < m; i++) work0[i] = mul_m32(work0[i], _inv_table[m + i], inv, mod); for (int i = 0, i0 = MIN(m, A_len - m); i < i0; i++) work0[i] = add_m32(work0[i], A[m + i], mod2); for (int i = m; i < (m << 1); i++) work0[i] = 0; ntt(work0, m << 1); for (int i = 0; i < (m << 1); i++) work0[i] = mul_m32(work0[i], work1[i], inv, mod); intt(work0, m << 1); for (int i = m; i < (m << 1); i++) ret[i] = work0[i - m]; for (int i = 0; i < (m << 1); i++) work1[i] = ret[i]; for (int i = (m << 1); i < (m << 2); i++) work1[i] = 0; ntt(work1, m << 2); for (int i = 0; i < (m << 1); i++) work0[i] = mul_m32(work1[i], work3[i], inv, mod); intt(work0, m << 1); for (int i = 0; i < m; i++) work0[i] = 0; ntt(work0, m << 1); for (int i = 0; i < (m << 1); i++) work0[i] = mul_m32(work0[i], work3[i], inv, mod); intt(work0, m << 1); for (int i = m; i < (m << 1); i++) work2[i] = min_m32(work0[i], mod2); } for (int i = 0, i0 = MIN(m, A_len); i < i0; i++) work0[i] = mul_m32(to_m32(i, r2, inv, mod), A[i], inv, mod); for (int i = MIN(m, A_len); i < m; i++) work0[i] = 0; ntt(work0, m); for (int i = 0; i < m; i++) work0[i] = mul_m32(work0[i], work1[i], inv, mod); intt(work0, m); for (int i = 0; i < m; i++) work0[i] = sub_m32(work0[i], mul_m32(to_m32(i, r2, inv, mod), ret[i], inv, mod), mod2); for (int i = m; i < m + (m >> 1); i++) work0[i] = work0[i - (m >> 1)]; for (int i = m >> 1; i < m; i++) work0[i] = 0; for (int i = m + (m >> 1); i < (m << 1); i++) work0[i] = 0; ntt(work0, m); ntt(work0 + m, m); for (int i = m; i < m + (m >> 1); i++) work3[i] = work2[i - (m >> 1)]; for (int i = m + (m >> 1); i < (m << 1); i++) work3[i] = 0; ntt(work3 + m, m); for (int i = 0; i < m; i++) work0[m + i] = add_m32(mul_m32(work0[i], work3[m + i], inv, mod), mul_m32(work0[m + i], work3[i], inv, mod), mod2); for (int i = 0; i < m; i++) work0[i] = mul_m32(work0[i], work3[i], inv, mod); intt(work0, m); intt(work0 + m, m); for (int i = 0; i < m >> 1; i++) work0[(m >> 1) + i] = add_m32(work0[(m >> 1) + i], work0[m + i], mod2); for (int i = 0; i < m; i++) work0[i] = mul_m32(work0[i], _inv_table[m + i], inv, mod); for (int i = 0, i0 = MIN(m, A_len - m); i < i0; i++) work0[i] = add_m32(work0[i], A[m + i], mod2); for (int i = m; i < (m << 1); i++) work0[i] = 0; ntt(work0, m << 1); for (int i = 0; i < m << 1; i++) work0[i] = mul_m32(work0[i], work1[i], inv, mod); intt(work0, m << 1); for (int i = m; i < n; i++) ret[i] = work0[i - m]; free(work0); free(work1); free(work2); free(work3); return ret; } #pragma endregion FormalPowerSeries exponential #pragma region FormalPowerSeries derivative m32 *derivative_fps(m32 *A, int A_len) { if (A_len <= 1) { m32 *ret = (m32 *)calloc(1, sizeof(m32)); #ifdef LOCAL if (ret == NULL) { exit(EXIT_FAILURE); } #endif ret[0] = 0; return ret; } m32 *ret = (m32 *)calloc((A_len - 1), sizeof(m32)); #ifdef LOCAL if (ret == NULL) { exit(EXIT_FAILURE); } #endif for (int i = 1; i < A_len; i++) ret[i - 1] = mul_m32(A[i], to_m32(i, r2, inv, mod), inv, mod); return ret; } #pragma endregion FormalPowerSeries derivative #pragma region FormalPowerSeries integral m32 *integral_fps(m32 *A, int A_len) { int ret_len = A_len + 1; m32 *ret = (m32 *)calloc(ret_len, sizeof(m32)); #ifdef LOCAL if (ret == NULL) { exit(EXIT_FAILURE); } #endif ret[0] = 0; for (int i = 1; i < ret_len; i++) ret[i] = mul_m32(A[i - 1], _inv_table[i], inv, mod); return ret; } #pragma endregion FormalPowerSeries integral #pragma region FormalPowerSeries quotient m32 *quo_fps(m32 *A, int A_len, m32 *B, int B_len, int n) { // assert(A != NULL && A_len != 0 && B != NULL && B_len != 0); // assert(B[0] != 0); // assert(n >= 1); if (n == 1) { m32 *ret = (m32 *)calloc(1, sizeof(m32)); #ifdef LOCAL if (ret == NULL) { exit(EXIT_FAILURE); } #endif ret[0] = div_m32(A[0], B[0], inv, mod); return ret; } const int m = 1 << (31 - __builtin_clz(n - 1)); m32 *work0 = (m32 *)calloc(1 << 20, sizeof(m32)); m32 *work1 = (m32 *)calloc(1 << 20, sizeof(m32)); m32 *work2 = (m32 *)calloc(m << 1, sizeof(m32)); m32 *ret = (m32 *)calloc(n, sizeof(m32)); #ifdef LOCAL if (work0 == NULL || work1 == NULL || work2 == NULL || ret == NULL) { exit(EXIT_FAILURE); } #endif m32 *t0 = inv_fps(B, B_len, m); for (int i = 0; i < m; i++) work2[i] = t0[i]; free(t0); for (int i = m; i < (m << 1); i++) work2[i] = 0; int mm = m << 1; ntt(work2, mm); for (int i = 0; i < MIN(m, A_len); i++) work0[i] = A[i]; for (int i = MIN(m, A_len); i < mm; i++) work0[i] = 0; ntt(work0, mm); for (int i = 0; i < mm; i++) work0[i] = mul_m32(work0[i], work2[i], inv, mod); intt(work0, mm); for (int i = 0; i < m; i++) ret[i] = work0[i]; for (int i = m; i < (m << 1); i++) work0[i] = 0; ntt(work0, mm); for (int i = 0; i < MIN(mm, B_len); i++) work1[i] = B[i]; for (int i = MIN(mm, B_len); i < mm; i++) work1[i] = 0; ntt(work1, mm); for (int i = 0; i < mm; i++) work0[i] = mul_m32(work0[i], work1[i], inv, mod); intt(work0, mm); for (int i = 0; i < m; i++) work0[i] = 0; for (int i = m, i0 = MIN(mm, A_len); i < i0; i++) work0[i] = sub_m32(work0[i], A[i], mod2); ntt(work0, mm); for (int i = 0; i < mm; i++) work0[i] = mul_m32(work0[i], work2[i], inv, mod); intt(work0, mm); for (int i = m; i < n; i++) ret[i] = min_m32(work0[i], mod2); free(work0); free(work1); free(work2); return ret; } #pragma endregion FormalPowerSeries quotient #pragma region FormalPowerSeries remainder m32 *rem_fps(m32 *A, int A_len, m32 *B, int B_len, int n) { m32 *ret = (m32 *)calloc(n, sizeof(m32)); #ifdef LOCAL if (ret == NULL) { exit(EXIT_FAILURE); } #endif m32 *C = convolute(quo_fps(A, A_len, B, B_len, n), n, B, B_len); if (A_len < n) { for (int i = 0; i < A_len; i++) ret[i] = sub_m32(A[i], C[i], mod2); for (int i = A_len; i < n; i++) ret[i] = min_m32(C[i], mod2); } else for (int i = 0; i < n; i++) ret[i] = sub_m32(A[i], C[i], mod2); return ret; } #pragma endregion FormalPowerSeries remainder #pragma region FormalPowerSeries logarithm m32 *log_fps(m32 *A, int A_len, int n) { // assert(A != NULL && A_len != 0); // assert(A[0] == one); m32 *work = (m32 *)calloc(MIN(A_len, n), sizeof(m32)); #ifdef LOCAL if (work == NULL) { exit(EXIT_FAILURE); } #endif for (int i = 0; i < MIN(A_len, n); i++) work[i] = A[i]; for (int i = 0; i < MIN(A_len, n); i++) work[i] = mul_m32(work[i], to_m32(i, r2, inv, mod), inv, mod); m32 *ret = quo_fps(work, MIN(A_len, n), A, A_len, n); for (int i = 1; i < n; i++) ret[i] = mul_m32(ret[i], _inv_table[i], inv, mod); free(work); return ret; } #pragma endregion FormalPowerSeries logarithm #pragma region FormalPowerSeries power m32 *pow_fps(m32 *A, int A_len, i64 indx, int n) { // assert(n >= 1); if (A == NULL || A_len == 0 || A[0] != one) { // assert(indx >= 0); if (indx == 0) { m32 *ret = (m32 *)calloc(n, sizeof(m32)); #ifdef LOCAL if (ret == NULL) { exit(EXIT_FAILURE);} #endif ret[0] = one; return ret; } int o; for (o = 0; o < A_len; o++) { if (A[o]) break; if (o == A_len - 1) { o = -1; break; } } if (o == -1 || o > (n - 1) / indx) { m32 *ret = (m32 *)calloc(n, sizeof(m32)); return ret; } const m32 b = inv_m32(A[o], inv, mod); const m32 c = pow_m32(A[o], indx, inv, mod); const int d = MIN(n - indx * o, A_len - o); m32 *work0 = (m32 *)calloc(d, sizeof(m32)); #ifdef LOCAL if (work0 == NULL) { exit(EXIT_FAILURE); } #endif for (int i = 0; i < d; i++) work0[i] = mul_m32(b, A[o + i], inv, mod); m32 *work1 = pow_fps(work0, d, indx, n - indx * o); m32 *ret = (m32 *)calloc(n, sizeof(m32)); for (int i = 0; i < n - indx * o; i++) ret[indx * o + i] = mul_m32(c, work1[i], inv, mod); free(work0); free(work1); return ret; } m32 *B = log_fps(A, A_len, n); for (int i = 0; i < n; i++) B[i] = mul_m32(B[i], to_m32(indx, r2, inv, mod), inv, mod); return exp_fps(B, n, n); } #pragma endregion FormalPowerSeries power #pragma region FormalPowerSeries Taylor_shift m32 *taylor_shift_fps(m32 *A, int A_len, m32 c) { m32 *ret = (m32 *)calloc(A_len, sizeof(m32)); m32 *work = (m32 *)calloc(A_len, sizeof(m32)); #ifdef LOCAL if (ret == NULL || work == NULL) { exit(EXIT_FAILURE)); } #endif for (int i = 0; i < A_len; i++) ret[A_len - 1 - i] = mul_m32(A[i], _fact[i], inv, mod); work[0] = one; for (int i = 1; i < A_len; i++) work[i] = mul_m32(mul_m32(mul_m32(work[i - 1], c, inv, mod), _inv_fact[i], inv, mod), _fact[i - 1], inv, mod); m32 *work1 = convolute(ret, A_len, work, A_len); for (int i = 0; i < A_len; i++) ret[A_len - 1 - i] = work1[i]; for (int i = 0; i < A_len; i++) ret[i] = mul_m32(ret[i], _inv_fact[i], inv, mod); free(work); free(work1); return ret; } #pragma endregion FormalPowerSeries Taylor_shift void Main(void) { m32 A[1 << 19]; int n = read_int(); pre_fact(n + 10); m32 c = in_m32(r2, inv, mod); for (int i = 0; i < n; i++) A[i] = in_m32(r2, inv, mod); m32 *C = taylor_shift_fps(A, n, c); for (int i = 0; i < n; i++) { if (i) SP(); out_m32(C[i], inv, mod); } NL(); } int main(void) { Main(); return 0; }